vlac-test / checkpoint-174 /trainer_state.json
futurefantasy's picture
Upload 40 files
8b04fba verified
{
"best_global_step": 100,
"best_metric": 3.08596992,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 174,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005747126436781609,
"grad_norm": 54.19905417297271,
"learning_rate": 1.111111111111111e-06,
"loss": 1.7543538808822632,
"memory(GiB)": 10.22,
"step": 1,
"token_acc": 0.5277777777777778,
"train_speed(iter/s)": 0.067004
},
{
"epoch": 0.028735632183908046,
"grad_norm": 49.497954919310544,
"learning_rate": 5.555555555555557e-06,
"loss": 1.7060493230819702,
"memory(GiB)": 10.22,
"step": 5,
"token_acc": 0.4930555555555556,
"train_speed(iter/s)": 0.248559
},
{
"epoch": 0.05747126436781609,
"grad_norm": 14.633223810699512,
"learning_rate": 1.1111111111111113e-05,
"loss": 1.3003526687622071,
"memory(GiB)": 10.22,
"step": 10,
"token_acc": 0.65,
"train_speed(iter/s)": 0.379406
},
{
"epoch": 0.08620689655172414,
"grad_norm": 17.504341368414668,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.1191390037536622,
"memory(GiB)": 10.22,
"step": 15,
"token_acc": 0.7055555555555556,
"train_speed(iter/s)": 0.457972
},
{
"epoch": 0.11494252873563218,
"grad_norm": 5.415303862567768,
"learning_rate": 1.9991889981715696e-05,
"loss": 1.0228230476379394,
"memory(GiB)": 10.22,
"step": 20,
"token_acc": 0.7388888888888889,
"train_speed(iter/s)": 0.514038
},
{
"epoch": 0.14367816091954022,
"grad_norm": 4.302409750602581,
"learning_rate": 1.9900803279611643e-05,
"loss": 0.8856360435485839,
"memory(GiB)": 10.22,
"step": 25,
"token_acc": 0.7777777777777778,
"train_speed(iter/s)": 0.554758
},
{
"epoch": 0.1724137931034483,
"grad_norm": 5.504310065306082,
"learning_rate": 1.9709418174260523e-05,
"loss": 0.8048258781433105,
"memory(GiB)": 10.22,
"step": 30,
"token_acc": 0.8055555555555556,
"train_speed(iter/s)": 0.585649
},
{
"epoch": 0.20114942528735633,
"grad_norm": 9.75691173960977,
"learning_rate": 1.9419673459912652e-05,
"loss": 0.7117091178894043,
"memory(GiB)": 10.22,
"step": 35,
"token_acc": 0.7833333333333333,
"train_speed(iter/s)": 0.609091
},
{
"epoch": 0.22988505747126436,
"grad_norm": 4.470052209724094,
"learning_rate": 1.9034504346103825e-05,
"loss": 0.659924840927124,
"memory(GiB)": 10.22,
"step": 40,
"token_acc": 0.7597765363128491,
"train_speed(iter/s)": 0.628438
},
{
"epoch": 0.25862068965517243,
"grad_norm": 7.40586681091568,
"learning_rate": 1.8557812723014476e-05,
"loss": 0.5936851024627685,
"memory(GiB)": 10.22,
"step": 45,
"token_acc": 0.8166666666666667,
"train_speed(iter/s)": 0.645078
},
{
"epoch": 0.28735632183908044,
"grad_norm": 3.894185239193535,
"learning_rate": 1.7994427634035016e-05,
"loss": 0.5902361869812012,
"memory(GiB)": 10.22,
"step": 50,
"token_acc": 0.8100558659217877,
"train_speed(iter/s)": 0.658944
},
{
"epoch": 0.3160919540229885,
"grad_norm": 6.535408253273618,
"learning_rate": 1.7350056355963287e-05,
"loss": 0.5443444728851319,
"memory(GiB)": 10.22,
"step": 55,
"token_acc": 0.8491620111731844,
"train_speed(iter/s)": 0.666067
},
{
"epoch": 0.3448275862068966,
"grad_norm": 4.884017101989966,
"learning_rate": 1.6631226582407954e-05,
"loss": 0.4963292121887207,
"memory(GiB)": 10.22,
"step": 60,
"token_acc": 0.8435754189944135,
"train_speed(iter/s)": 0.676319
},
{
"epoch": 0.3735632183908046,
"grad_norm": 5.325652389779561,
"learning_rate": 1.584522029609889e-05,
"loss": 0.5059492111206054,
"memory(GiB)": 10.22,
"step": 65,
"token_acc": 0.8379888268156425,
"train_speed(iter/s)": 0.685075
},
{
"epoch": 0.40229885057471265,
"grad_norm": 5.744137888836345,
"learning_rate": 1.5000000000000002e-05,
"loss": 0.48285999298095705,
"memory(GiB)": 10.22,
"step": 70,
"token_acc": 0.8277777777777777,
"train_speed(iter/s)": 0.692846
},
{
"epoch": 0.43103448275862066,
"grad_norm": 5.1841000179766406,
"learning_rate": 1.410412805452757e-05,
"loss": 0.44365577697753905,
"memory(GiB)": 10.22,
"step": 75,
"token_acc": 0.8333333333333334,
"train_speed(iter/s)": 0.70009
},
{
"epoch": 0.45977011494252873,
"grad_norm": 5.4044860823804015,
"learning_rate": 1.3166679938014728e-05,
"loss": 0.44077281951904296,
"memory(GiB)": 10.22,
"step": 80,
"token_acc": 0.8547486033519553,
"train_speed(iter/s)": 0.706774
},
{
"epoch": 0.4885057471264368,
"grad_norm": 3.814192170889284,
"learning_rate": 1.2197152309122173e-05,
"loss": 0.41083593368530275,
"memory(GiB)": 10.22,
"step": 85,
"token_acc": 0.8777777777777778,
"train_speed(iter/s)": 0.712107
},
{
"epoch": 0.5172413793103449,
"grad_norm": 6.230484574533733,
"learning_rate": 1.1205366802553231e-05,
"loss": 0.4023772716522217,
"memory(GiB)": 10.22,
"step": 90,
"token_acc": 0.8491620111731844,
"train_speed(iter/s)": 0.716314
},
{
"epoch": 0.5459770114942529,
"grad_norm": 5.5433724273249965,
"learning_rate": 1.0201370532654404e-05,
"loss": 0.3939186096191406,
"memory(GiB)": 10.22,
"step": 95,
"token_acc": 0.8715083798882681,
"train_speed(iter/s)": 0.720953
},
{
"epoch": 0.5747126436781609,
"grad_norm": 4.744446848913436,
"learning_rate": 9.195334312832742e-06,
"loss": 0.36634814739227295,
"memory(GiB)": 10.22,
"step": 100,
"token_acc": 0.8944444444444445,
"train_speed(iter/s)": 0.725326
},
{
"epoch": 0.5747126436781609,
"eval_loss": 3.085969924926758,
"eval_runtime": 8.8084,
"eval_samples_per_second": 116.253,
"eval_steps_per_second": 2.498,
"eval_token_acc": 0.7783402536829532,
"step": 100
},
{
"epoch": 0.603448275862069,
"grad_norm": 3.926014775478041,
"learning_rate": 8.197449621860944e-06,
"loss": 0.35011539459228513,
"memory(GiB)": 15.92,
"step": 105,
"token_acc": 0.7667682926829268,
"train_speed(iter/s)": 0.678189
},
{
"epoch": 0.632183908045977,
"grad_norm": 4.246402025826811,
"learning_rate": 7.217825360835475e-06,
"loss": 0.33618762493133547,
"memory(GiB)": 15.92,
"step": 110,
"token_acc": 0.8659217877094972,
"train_speed(iter/s)": 0.677226
},
{
"epoch": 0.6609195402298851,
"grad_norm": 4.310168084132965,
"learning_rate": 6.266385446673791e-06,
"loss": 0.31383814811706545,
"memory(GiB)": 15.92,
"step": 115,
"token_acc": 0.9055555555555556,
"train_speed(iter/s)": 0.67988
},
{
"epoch": 0.6896551724137931,
"grad_norm": 4.774968229122785,
"learning_rate": 5.352768279562315e-06,
"loss": 0.2903557300567627,
"memory(GiB)": 15.92,
"step": 120,
"token_acc": 0.9,
"train_speed(iter/s)": 0.684505
},
{
"epoch": 0.7183908045977011,
"grad_norm": 4.0187528651074835,
"learning_rate": 4.486229102783084e-06,
"loss": 0.2903176784515381,
"memory(GiB)": 15.92,
"step": 125,
"token_acc": 0.888268156424581,
"train_speed(iter/s)": 0.688844
},
{
"epoch": 0.7471264367816092,
"grad_norm": 4.23665356199129,
"learning_rate": 3.6755462440462288e-06,
"loss": 0.25630669593811034,
"memory(GiB)": 15.92,
"step": 130,
"token_acc": 0.9111111111111111,
"train_speed(iter/s)": 0.693005
},
{
"epoch": 0.7758620689655172,
"grad_norm": 5.164302726059258,
"learning_rate": 2.9289321881345257e-06,
"loss": 0.26200544834136963,
"memory(GiB)": 15.92,
"step": 135,
"token_acc": 0.8888888888888888,
"train_speed(iter/s)": 0.696714
},
{
"epoch": 0.8045977011494253,
"grad_norm": 4.025947370412893,
"learning_rate": 2.2539503817234553e-06,
"loss": 0.24404802322387695,
"memory(GiB)": 15.92,
"step": 140,
"token_acc": 0.9222222222222223,
"train_speed(iter/s)": 0.700315
},
{
"epoch": 0.8333333333333334,
"grad_norm": 4.029452854747659,
"learning_rate": 1.6574386131713872e-06,
"loss": 0.2302267074584961,
"memory(GiB)": 15.92,
"step": 145,
"token_acc": 0.9,
"train_speed(iter/s)": 0.703785
},
{
"epoch": 0.8620689655172413,
"grad_norm": 4.111326617944181,
"learning_rate": 1.1454397434679022e-06,
"loss": 0.21495592594146729,
"memory(GiB)": 15.92,
"step": 150,
"token_acc": 0.9333333333333333,
"train_speed(iter/s)": 0.706914
},
{
"epoch": 0.8908045977011494,
"grad_norm": 3.6705370041668646,
"learning_rate": 7.231404900585714e-07,
"loss": 0.21858932971954345,
"memory(GiB)": 15.92,
"step": 155,
"token_acc": 0.88268156424581,
"train_speed(iter/s)": 0.709691
},
{
"epoch": 0.9195402298850575,
"grad_norm": 3.4777493199480354,
"learning_rate": 3.9481888368627764e-07,
"loss": 0.21195197105407715,
"memory(GiB)": 15.92,
"step": 160,
"token_acc": 0.9162011173184358,
"train_speed(iter/s)": 0.712557
},
{
"epoch": 0.9482758620689655,
"grad_norm": 3.8081053430996246,
"learning_rate": 1.6380093052856482e-07,
"loss": 0.20168027877807618,
"memory(GiB)": 15.92,
"step": 165,
"token_acc": 0.9277777777777778,
"train_speed(iter/s)": 0.715106
},
{
"epoch": 0.9770114942528736,
"grad_norm": 3.75099140436243,
"learning_rate": 3.242691865790071e-08,
"loss": 0.2000497817993164,
"memory(GiB)": 15.92,
"step": 170,
"token_acc": 0.8888888888888888,
"train_speed(iter/s)": 0.716937
},
{
"epoch": 1.0,
"eval_loss": 3.787536144256592,
"eval_runtime": 5.2095,
"eval_samples_per_second": 196.563,
"eval_steps_per_second": 4.223,
"eval_token_acc": 0.7779989761674535,
"step": 174
}
],
"logging_steps": 5,
"max_steps": 174,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 220,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.470457873998807e+17,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}