phi_orm_3.8b_8ksamples / trainer_state.json

Upload trainer_state.json with huggingface_hub

9d30c4e verified about 1 year ago

13.8 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.4708236471176765,
	"eval_steps": 500,
	"global_step": 8000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.005885295588970956,
	"grad_norm": 29.875,
	"learning_rate": 3.9215686274509805e-05,
	"loss": 1.0686,
	"step": 100
	},
	{
	"epoch": 0.011770591177941912,
	"grad_norm": 11.375,
	"learning_rate": 7.843137254901961e-05,
	"loss": 0.6153,
	"step": 200
	},
	{
	"epoch": 0.01765588676691287,
	"grad_norm": 20.25,
	"learning_rate": 0.00011764705882352942,
	"loss": 0.5969,
	"step": 300
	},
	{
	"epoch": 0.023541182355883823,
	"grad_norm": 4.5,
	"learning_rate": 0.00015686274509803922,
	"loss": 0.5478,
	"step": 400
	},
	{
	"epoch": 0.02942647794485478,
	"grad_norm": 7.40625,
	"learning_rate": 0.000196078431372549,
	"loss": 0.5599,
	"step": 500
	},
	{
	"epoch": 0.03531177353382574,
	"grad_norm": 2.71875,
	"learning_rate": 0.00019998528443307886,
	"loss": 0.565,
	"step": 600
	},
	{
	"epoch": 0.04119706912279669,
	"grad_norm": 30.875,
	"learning_rate": 0.00019993442136695625,
	"loss": 0.5501,
	"step": 700
	},
	{
	"epoch": 0.047082364711767646,
	"grad_norm": 3.328125,
	"learning_rate": 0.00019984724760441856,
	"loss": 0.5355,
	"step": 800
	},
	{
	"epoch": 0.05296766030073861,
	"grad_norm": 6.03125,
	"learning_rate": 0.00019972379481963764,
	"loss": 0.5344,
	"step": 900
	},
	{
	"epoch": 0.05885295588970956,
	"grad_norm": 29.75,
	"learning_rate": 0.00019956410786859524,
	"loss": 0.5016,
	"step": 1000
	},
	{
	"epoch": 0.06473825147868052,
	"grad_norm": 14.0625,
	"learning_rate": 0.00019936824477278514,
	"loss": 0.5091,
	"step": 1100
	},
	{
	"epoch": 0.07062354706765148,
	"grad_norm": 24.75,
	"learning_rate": 0.00019913627669813103,
	"loss": 0.5005,
	"step": 1200
	},
	{
	"epoch": 0.07650884265662243,
	"grad_norm": 3.203125,
	"learning_rate": 0.00019886828792912894,
	"loss": 0.4961,
	"step": 1300
	},
	{
	"epoch": 0.08239413824559338,
	"grad_norm": 1.875,
	"learning_rate": 0.0001985643758382227,
	"loss": 0.4755,
	"step": 1400
	},
	{
	"epoch": 0.08827943383456434,
	"grad_norm": 7.46875,
	"learning_rate": 0.00019822465085042422,
	"loss": 0.4889,
	"step": 1500
	},
	{
	"epoch": 0.09416472942353529,
	"grad_norm": 3.859375,
	"learning_rate": 0.0001978492364031911,
	"loss": 0.5024,
	"step": 1600
	},
	{
	"epoch": 0.10005002501250625,
	"grad_norm": 14.4375,
	"learning_rate": 0.00019743826890157614,
	"loss": 0.4681,
	"step": 1700
	},
	{
	"epoch": 0.10593532060147721,
	"grad_norm": 10.375,
	"learning_rate": 0.0001969918976686652,
	"loss": 0.488,
	"step": 1800
	},
	{
	"epoch": 0.11182061619044817,
	"grad_norm": 9.5625,
	"learning_rate": 0.00019651028489132147,
	"loss": 0.4859,
	"step": 1900
	},
	{
	"epoch": 0.11770591177941912,
	"grad_norm": 15.125,
	"learning_rate": 0.0001959936055612557,
	"loss": 0.5028,
	"step": 2000
	},
	{
	"epoch": 0.12359120736839008,
	"grad_norm": 12.5625,
	"learning_rate": 0.0001954420474114435,
	"loss": 0.4937,
	"step": 2100
	},
	{
	"epoch": 0.12947650295736104,
	"grad_norm": 3.890625,
	"learning_rate": 0.00019485581084791376,
	"loss": 0.4801,
	"step": 2200
	},
	{
	"epoch": 0.13536179854633199,
	"grad_norm": 19.125,
	"learning_rate": 0.0001942351088769319,
	"loss": 0.4853,
	"step": 2300
	},
	{
	"epoch": 0.14124709413530295,
	"grad_norm": 11.8125,
	"learning_rate": 0.0001935801670276052,
	"loss": 0.4739,
	"step": 2400
	},
	{
	"epoch": 0.1471323897242739,
	"grad_norm": 35.5,
	"learning_rate": 0.00019289122326993777,
	"loss": 0.4868,
	"step": 2500
	},
	{
	"epoch": 0.15301768531324486,
	"grad_norm": 20.875,
	"learning_rate": 0.00019216852792836516,
	"loss": 0.4925,
	"step": 2600
	},
	{
	"epoch": 0.1589029809022158,
	"grad_norm": 12.5625,
	"learning_rate": 0.00019141234359080055,
	"loss": 0.4808,
	"step": 2700
	},
	{
	"epoch": 0.16478827649118677,
	"grad_norm": 8.6875,
	"learning_rate": 0.00019062294501322416,
	"loss": 0.4757,
	"step": 2800
	},
	{
	"epoch": 0.17067357208015774,
	"grad_norm": 20.625,
	"learning_rate": 0.0001898006190198525,
	"loss": 0.4805,
	"step": 2900
	},
	{
	"epoch": 0.17655886766912868,
	"grad_norm": 10.25,
	"learning_rate": 0.0001889456643989218,
	"loss": 0.4832,
	"step": 3000
	},
	{
	"epoch": 0.18244416325809965,
	"grad_norm": 20.25,
	"learning_rate": 0.00018805839179412485,
	"loss": 0.4559,
	"step": 3100
	},
	{
	"epoch": 0.18832945884707059,
	"grad_norm": 8.5625,
	"learning_rate": 0.00018713912359174,
	"loss": 0.497,
	"step": 3200
	},
	{
	"epoch": 0.19421475443604155,
	"grad_norm": 6.40625,
	"learning_rate": 0.00018618819380349382,
	"loss": 0.4776,
	"step": 3300
	},
	{
	"epoch": 0.2001000500250125,
	"grad_norm": 12.8125,
	"learning_rate": 0.00018520594794519941,
	"loss": 0.4915,
	"step": 3400
	},
	{
	"epoch": 0.20598534561398346,
	"grad_norm": 1.84375,
	"learning_rate": 0.00018419274291121485,
	"loss": 0.4498,
	"step": 3500
	},
	{
	"epoch": 0.21187064120295443,
	"grad_norm": 3.8125,
	"learning_rate": 0.00018314894684476736,
	"loss": 0.4625,
	"step": 3600
	},
	{
	"epoch": 0.21775593679192537,
	"grad_norm": 19.125,
	"learning_rate": 0.00018207493900419027,
	"loss": 0.4625,
	"step": 3700
	},
	{
	"epoch": 0.22364123238089634,
	"grad_norm": 11.5,
	"learning_rate": 0.00018097110962512128,
	"loss": 0.4655,
	"step": 3800
	},
	{
	"epoch": 0.22952652796986728,
	"grad_norm": 6.3125,
	"learning_rate": 0.00017983785977871209,
	"loss": 0.4488,
	"step": 3900
	},
	{
	"epoch": 0.23541182355883825,
	"grad_norm": 9.875,
	"learning_rate": 0.00017867560122590125,
	"loss": 0.4441,
	"step": 4000
	},
	{
	"epoch": 0.24129711914780919,
	"grad_norm": 12.875,
	"learning_rate": 0.00017748475626780277,
	"loss": 0.4732,
	"step": 4100
	},
	{
	"epoch": 0.24718241473678015,
	"grad_norm": 4.21875,
	"learning_rate": 0.0001762657575922649,
	"loss": 0.4544,
	"step": 4200
	},
	{
	"epoch": 0.2530677103257511,
	"grad_norm": 3.125,
	"learning_rate": 0.0001750190481166552,
	"loss": 0.4779,
	"step": 4300
	},
	{
	"epoch": 0.2589530059147221,
	"grad_norm": 2.1875,
	"learning_rate": 0.00017374508082692848,
	"loss": 0.4661,
	"step": 4400
	},
	{
	"epoch": 0.26483830150369303,
	"grad_norm": 26.25,
	"learning_rate": 0.0001724443186130367,
	"loss": 0.4916,
	"step": 4500
	},
	{
	"epoch": 0.27072359709266397,
	"grad_norm": 8.125,
	"learning_rate": 0.00017111723410073991,
	"loss": 0.449,
	"step": 4600
	},
	{
	"epoch": 0.2766088926816349,
	"grad_norm": 8.625,
	"learning_rate": 0.00016976430947988007,
	"loss": 0.45,
	"step": 4700
	},
	{
	"epoch": 0.2824941882706059,
	"grad_norm": 3.59375,
	"learning_rate": 0.00016838603632917954,
	"loss": 0.4593,
	"step": 4800
	},
	{
	"epoch": 0.28837948385957685,
	"grad_norm": 6.40625,
	"learning_rate": 0.0001669829154376285,
	"loss": 0.4847,
	"step": 4900
	},
	{
	"epoch": 0.2942647794485478,
	"grad_norm": 13.125,
	"learning_rate": 0.00016555545662252536,
	"loss": 0.4576,
	"step": 5000
	},
	{
	"epoch": 0.3001500750375188,
	"grad_norm": 14.3125,
	"learning_rate": 0.00016410417854423735,
	"loss": 0.4457,
	"step": 5100
	},
	{
	"epoch": 0.3060353706264897,
	"grad_norm": 29.0,
	"learning_rate": 0.00016262960851774752,
	"loss": 0.4972,
	"step": 5200
	},
	{
	"epoch": 0.31192066621546066,
	"grad_norm": 20.75,
	"learning_rate": 0.00016113228232105757,
	"loss": 0.4715,
	"step": 5300
	},
	{
	"epoch": 0.3178059618044316,
	"grad_norm": 22.5,
	"learning_rate": 0.0001596127440005152,
	"loss": 0.4696,
	"step": 5400
	},
	{
	"epoch": 0.3236912573934026,
	"grad_norm": 8.1875,
	"learning_rate": 0.00015807154567313775,
	"loss": 0.4629,
	"step": 5500
	},
	{
	"epoch": 0.32957655298237354,
	"grad_norm": 4.375,
	"learning_rate": 0.0001565092473260029,
	"loss": 0.475,
	"step": 5600
	},
	{
	"epoch": 0.3354618485713445,
	"grad_norm": 13.5,
	"learning_rate": 0.00015492641661278005,
	"loss": 0.4511,
	"step": 5700
	},
	{
	"epoch": 0.3413471441603155,
	"grad_norm": 3.5625,
	"learning_rate": 0.0001533236286474762,
	"loss": 0.4743,
	"step": 5800
	},
	{
	"epoch": 0.3472324397492864,
	"grad_norm": 11.8125,
	"learning_rate": 0.0001517014657954708,
	"loss": 0.4418,
	"step": 5900
	},
	{
	"epoch": 0.35311773533825735,
	"grad_norm": 26.125,
	"learning_rate": 0.00015006051746191626,
	"loss": 0.45,
	"step": 6000
	},
	{
	"epoch": 0.3590030309272283,
	"grad_norm": 15.375,
	"learning_rate": 0.00014840137987758028,
	"loss": 0.4463,
	"step": 6100
	},
	{
	"epoch": 0.3648883265161993,
	"grad_norm": 5.90625,
	"learning_rate": 0.00014672465588220837,
	"loss": 0.4559,
	"step": 6200
	},
	{
	"epoch": 0.37077362210517023,
	"grad_norm": 12.9375,
	"learning_rate": 0.0001450309547054846,
	"loss": 0.4398,
	"step": 6300
	},
	{
	"epoch": 0.37665891769414117,
	"grad_norm": 21.875,
	"learning_rate": 0.00014332089174567126,
	"loss": 0.4454,
	"step": 6400
	},
	{
	"epoch": 0.38254421328311217,
	"grad_norm": 16.875,
	"learning_rate": 0.00014159508834600657,
	"loss": 0.4443,
	"step": 6500
	},
	{
	"epoch": 0.3884295088720831,
	"grad_norm": 34.25,
	"learning_rate": 0.00013985417156894267,
	"loss": 0.4762,
	"step": 6600
	},
	{
	"epoch": 0.39431480446105405,
	"grad_norm": 4.5625,
	"learning_rate": 0.0001380987739683055,
	"loss": 0.4795,
	"step": 6700
	},
	{
	"epoch": 0.400200100050025,
	"grad_norm": 15.75,
	"learning_rate": 0.00013632953335945927,
	"loss": 0.4603,
	"step": 6800
	},
	{
	"epoch": 0.406085395638996,
	"grad_norm": 5.40625,
	"learning_rate": 0.00013454709258755942,
	"loss": 0.4674,
	"step": 6900
	},
	{
	"epoch": 0.4119706912279669,
	"grad_norm": 30.125,
	"learning_rate": 0.00013275209929397775,
	"loss": 0.4595,
	"step": 7000
	},
	{
	"epoch": 0.41785598681693786,
	"grad_norm": 16.875,
	"learning_rate": 0.0001309452056809851,
	"loss": 0.4398,
	"step": 7100
	},
	{
	"epoch": 0.42374128240590886,
	"grad_norm": 5.6875,
	"learning_rate": 0.00012912706827477671,
	"loss": 0.4693,
	"step": 7200
	},
	{
	"epoch": 0.4296265779948798,
	"grad_norm": 17.125,
	"learning_rate": 0.00012729834768692667,
	"loss": 0.4564,
	"step": 7300
	},
	{
	"epoch": 0.43551187358385074,
	"grad_norm": 9.75,
	"learning_rate": 0.00012545970837435756,
	"loss": 0.4732,
	"step": 7400
	},
	{
	"epoch": 0.4413971691728217,
	"grad_norm": 6.3125,
	"learning_rate": 0.00012361181839791357,
	"loss": 0.4647,
	"step": 7500
	},
	{
	"epoch": 0.4472824647617927,
	"grad_norm": 19.0,
	"learning_rate": 0.00012175534917962352,
	"loss": 0.4697,
	"step": 7600
	},
	{
	"epoch": 0.4531677603507636,
	"grad_norm": 19.375,
	"learning_rate": 0.00011989097525874294,
	"loss": 0.4814,
	"step": 7700
	},
	{
	"epoch": 0.45905305593973456,
	"grad_norm": 2.015625,
	"learning_rate": 0.00011801937404666336,
	"loss": 0.4688,
	"step": 7800
	},
	{
	"epoch": 0.46493835152870555,
	"grad_norm": 9.625,
	"learning_rate": 0.00011614122558077828,
	"loss": 0.4665,
	"step": 7900
	},
	{
	"epoch": 0.4708236471176765,
	"grad_norm": 21.875,
	"learning_rate": 0.00011425721227739465,
	"loss": 0.472,
	"step": 8000
	}
	],
	"logging_steps": 100,
	"max_steps": 16991,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 4000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.253476198349144e+18,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}