echo-cyber-adapter / checkpoint-368 /trainer_state.json
Bmcbob76's picture
Upload folder using huggingface_hub
c5cce35 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.998642226748133,
"eval_steps": 500,
"global_step": 368,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05431093007467753,
"grad_norm": 1.0013175010681152,
"learning_rate": 2e-05,
"loss": 1.8305,
"step": 10
},
{
"epoch": 0.10862186014935506,
"grad_norm": 0.6650476455688477,
"learning_rate": 4e-05,
"loss": 1.6895,
"step": 20
},
{
"epoch": 0.1629327902240326,
"grad_norm": 0.2262110561132431,
"learning_rate": 6e-05,
"loss": 1.4921,
"step": 30
},
{
"epoch": 0.2172437202987101,
"grad_norm": 0.22021710872650146,
"learning_rate": 8e-05,
"loss": 1.4074,
"step": 40
},
{
"epoch": 0.27155465037338766,
"grad_norm": 0.18622460961341858,
"learning_rate": 0.0001,
"loss": 1.2593,
"step": 50
},
{
"epoch": 0.3258655804480652,
"grad_norm": 0.18928822875022888,
"learning_rate": 0.00012,
"loss": 1.1828,
"step": 60
},
{
"epoch": 0.3801765105227427,
"grad_norm": 4.061775207519531,
"learning_rate": 0.00014,
"loss": 1.0697,
"step": 70
},
{
"epoch": 0.4344874405974202,
"grad_norm": 0.20349960029125214,
"learning_rate": 0.00016,
"loss": 1.0102,
"step": 80
},
{
"epoch": 0.48879837067209775,
"grad_norm": 0.23872722685337067,
"learning_rate": 0.00018,
"loss": 0.9941,
"step": 90
},
{
"epoch": 0.5431093007467753,
"grad_norm": 0.20515283942222595,
"learning_rate": 0.0002,
"loss": 0.9608,
"step": 100
},
{
"epoch": 0.5974202308214528,
"grad_norm": 0.22473494708538055,
"learning_rate": 0.00019931371771625544,
"loss": 0.9308,
"step": 110
},
{
"epoch": 0.6517311608961304,
"grad_norm": 0.24142144620418549,
"learning_rate": 0.0001972642905324813,
"loss": 0.9381,
"step": 120
},
{
"epoch": 0.7060420909708078,
"grad_norm": 0.260145902633667,
"learning_rate": 0.00019387984816003867,
"loss": 0.8955,
"step": 130
},
{
"epoch": 0.7603530210454854,
"grad_norm": 0.22235779464244843,
"learning_rate": 0.00018920684425573865,
"loss": 0.8667,
"step": 140
},
{
"epoch": 0.814663951120163,
"grad_norm": 0.23329713940620422,
"learning_rate": 0.00018330941881540915,
"loss": 0.873,
"step": 150
},
{
"epoch": 0.8689748811948405,
"grad_norm": 0.2553715109825134,
"learning_rate": 0.0001762685178110382,
"loss": 0.8651,
"step": 160
},
{
"epoch": 0.923285811269518,
"grad_norm": 0.22536128759384155,
"learning_rate": 0.0001681807821550438,
"loss": 0.8504,
"step": 170
},
{
"epoch": 0.9775967413441955,
"grad_norm": 0.22558774054050446,
"learning_rate": 0.00015915722124135227,
"loss": 0.8414,
"step": 180
},
{
"epoch": 1.031907671418873,
"grad_norm": 0.2400912493467331,
"learning_rate": 0.00014932168926979074,
"loss": 0.8389,
"step": 190
},
{
"epoch": 1.0862186014935507,
"grad_norm": 0.23116886615753174,
"learning_rate": 0.00013880918526722497,
"loss": 0.8289,
"step": 200
},
{
"epoch": 1.140529531568228,
"grad_norm": 0.2643767297267914,
"learning_rate": 0.00012776400013875006,
"loss": 0.8037,
"step": 210
},
{
"epoch": 1.1948404616429056,
"grad_norm": 0.23654422163963318,
"learning_rate": 0.00011633773618185302,
"loss": 0.8209,
"step": 220
},
{
"epoch": 1.2491513917175832,
"grad_norm": 0.25414180755615234,
"learning_rate": 0.00010468722624699401,
"loss": 0.8327,
"step": 230
},
{
"epoch": 1.3034623217922607,
"grad_norm": 0.24420738220214844,
"learning_rate": 9.297238110547074e-05,
"loss": 0.8056,
"step": 240
},
{
"epoch": 1.3577732518669383,
"grad_norm": 0.23167012631893158,
"learning_rate": 8.13539945708319e-05,
"loss": 0.8294,
"step": 250
},
{
"epoch": 1.4120841819416157,
"grad_norm": 0.2467813491821289,
"learning_rate": 6.999153649996595e-05,
"loss": 0.7809,
"step": 260
},
{
"epoch": 1.4663951120162932,
"grad_norm": 0.2599235475063324,
"learning_rate": 5.904096396634935e-05,
"loss": 0.7995,
"step": 270
},
{
"epoch": 1.5207060420909708,
"grad_norm": 0.2679561674594879,
"learning_rate": 4.865258064851579e-05,
"loss": 0.7845,
"step": 280
},
{
"epoch": 1.5750169721656482,
"grad_norm": 0.2540304958820343,
"learning_rate": 3.8968973815020806e-05,
"loss": 0.7868,
"step": 290
},
{
"epoch": 1.629327902240326,
"grad_norm": 0.26017382740974426,
"learning_rate": 3.0123057222115836e-05,
"loss": 0.7665,
"step": 300
},
{
"epoch": 1.6836388323150033,
"grad_norm": 0.2591923177242279,
"learning_rate": 2.2236246786624792e-05,
"loss": 0.7936,
"step": 310
},
{
"epoch": 1.737949762389681,
"grad_norm": 0.2649495601654053,
"learning_rate": 1.5416794074090258e-05,
"loss": 0.7597,
"step": 320
},
{
"epoch": 1.7922606924643585,
"grad_norm": 0.25946423411369324,
"learning_rate": 9.75830047614117e-06,
"loss": 0.7954,
"step": 330
},
{
"epoch": 1.8465716225390358,
"grad_norm": 0.24905863404273987,
"learning_rate": 5.338432470956589e-06,
"loss": 0.7648,
"step": 340
},
{
"epoch": 1.9008825526137136,
"grad_norm": 0.2536468803882599,
"learning_rate": 2.2178556007054872e-06,
"loss": 0.8026,
"step": 350
},
{
"epoch": 1.955193482688391,
"grad_norm": 0.27082565426826477,
"learning_rate": 4.3940179781019055e-07,
"loss": 0.7768,
"step": 360
}
],
"logging_steps": 10,
"max_steps": 368,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.122148310240461e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}