hin2kan_model / checkpoint-20000 /trainer_state.json

Add files using upload-large-folder tool

3c2307e verified 9 months ago

7.74 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 5.339028296849973,
	"eval_steps": 500,
	"global_step": 20000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.13347570742124934,
	"grad_norm": 3.1733596324920654,
	"learning_rate": 4.933395621996797e-05,
	"loss": 1.4561,
	"step": 500
	},
	{
	"epoch": 0.2669514148424987,
	"grad_norm": 2.9853200912475586,
	"learning_rate": 4.866657768286172e-05,
	"loss": 1.2371,
	"step": 1000
	},
	{
	"epoch": 0.400427122263748,
	"grad_norm": 2.6459007263183594,
	"learning_rate": 4.7999199145755475e-05,
	"loss": 1.1388,
	"step": 1500
	},
	{
	"epoch": 0.5339028296849974,
	"grad_norm": 2.6050009727478027,
	"learning_rate": 4.733182060864923e-05,
	"loss": 1.1141,
	"step": 2000
	},
	{
	"epoch": 0.6673785371062466,
	"grad_norm": 2.1738457679748535,
	"learning_rate": 4.666444207154298e-05,
	"loss": 1.086,
	"step": 2500
	},
	{
	"epoch": 0.800854244527496,
	"grad_norm": 2.692978620529175,
	"learning_rate": 4.599706353443674e-05,
	"loss": 1.0676,
	"step": 3000
	},
	{
	"epoch": 0.9343299519487454,
	"grad_norm": 2.072795867919922,
	"learning_rate": 4.532968499733049e-05,
	"loss": 0.9895,
	"step": 3500
	},
	{
	"epoch": 1.0678056593699947,
	"grad_norm": 2.18379282951355,
	"learning_rate": 4.466230646022424e-05,
	"loss": 0.9155,
	"step": 4000
	},
	{
	"epoch": 1.201281366791244,
	"grad_norm": 2.4531776905059814,
	"learning_rate": 4.3994927923117995e-05,
	"loss": 0.8136,
	"step": 4500
	},
	{
	"epoch": 1.3347570742124932,
	"grad_norm": 2.8264808654785156,
	"learning_rate": 4.332754938601175e-05,
	"loss": 0.8129,
	"step": 5000
	},
	{
	"epoch": 1.4682327816337426,
	"grad_norm": 1.746168851852417,
	"learning_rate": 4.26601708489055e-05,
	"loss": 0.8581,
	"step": 5500
	},
	{
	"epoch": 1.601708489054992,
	"grad_norm": 2.722280740737915,
	"learning_rate": 4.199279231179926e-05,
	"loss": 0.7905,
	"step": 6000
	},
	{
	"epoch": 1.7351841964762413,
	"grad_norm": 2.4873287677764893,
	"learning_rate": 4.1325413774693004e-05,
	"loss": 0.8327,
	"step": 6500
	},
	{
	"epoch": 1.8686599038974907,
	"grad_norm": 2.6175665855407715,
	"learning_rate": 4.0658035237586763e-05,
	"loss": 0.8191,
	"step": 7000
	},
	{
	"epoch": 2.00213561131874,
	"grad_norm": 1.2910939455032349,
	"learning_rate": 3.9990656700480516e-05,
	"loss": 0.8359,
	"step": 7500
	},
	{
	"epoch": 2.1356113187399894,
	"grad_norm": 2.5565571784973145,
	"learning_rate": 3.932327816337427e-05,
	"loss": 0.6788,
	"step": 8000
	},
	{
	"epoch": 2.269087026161239,
	"grad_norm": 2.173668146133423,
	"learning_rate": 3.865589962626802e-05,
	"loss": 0.6669,
	"step": 8500
	},
	{
	"epoch": 2.402562733582488,
	"grad_norm": 1.9133882522583008,
	"learning_rate": 3.798852108916178e-05,
	"loss": 0.6696,
	"step": 9000
	},
	{
	"epoch": 2.536038441003737,
	"grad_norm": 1.3470282554626465,
	"learning_rate": 3.7321142552055525e-05,
	"loss": 0.6803,
	"step": 9500
	},
	{
	"epoch": 2.6695141484249865,
	"grad_norm": 2.3730781078338623,
	"learning_rate": 3.6653764014949284e-05,
	"loss": 0.6684,
	"step": 10000
	},
	{
	"epoch": 2.802989855846236,
	"grad_norm": 2.106994390487671,
	"learning_rate": 3.598638547784303e-05,
	"loss": 0.6717,
	"step": 10500
	},
	{
	"epoch": 2.936465563267485,
	"grad_norm": 1.7302494049072266,
	"learning_rate": 3.531900694073679e-05,
	"loss": 0.676,
	"step": 11000
	},
	{
	"epoch": 3.0699412706887346,
	"grad_norm": 1.477286458015442,
	"learning_rate": 3.465162840363054e-05,
	"loss": 0.586,
	"step": 11500
	},
	{
	"epoch": 3.203416978109984,
	"grad_norm": 1.818613052368164,
	"learning_rate": 3.398424986652429e-05,
	"loss": 0.5467,
	"step": 12000
	},
	{
	"epoch": 3.3368926855312333,
	"grad_norm": 1.6314208507537842,
	"learning_rate": 3.3316871329418045e-05,
	"loss": 0.556,
	"step": 12500
	},
	{
	"epoch": 3.4703683929524827,
	"grad_norm": 2.8924617767333984,
	"learning_rate": 3.2649492792311804e-05,
	"loss": 0.5567,
	"step": 13000
	},
	{
	"epoch": 3.603844100373732,
	"grad_norm": 2.6945688724517822,
	"learning_rate": 3.198211425520555e-05,
	"loss": 0.5568,
	"step": 13500
	},
	{
	"epoch": 3.7373198077949814,
	"grad_norm": 2.092221736907959,
	"learning_rate": 3.131473571809931e-05,
	"loss": 0.5567,
	"step": 14000
	},
	{
	"epoch": 3.8707955152162308,
	"grad_norm": 1.6795735359191895,
	"learning_rate": 3.064735718099306e-05,
	"loss": 0.5764,
	"step": 14500
	},
	{
	"epoch": 4.00427122263748,
	"grad_norm": 2.4606454372406006,
	"learning_rate": 2.9979978643886814e-05,
	"loss": 0.5716,
	"step": 15000
	},
	{
	"epoch": 4.1377469300587295,
	"grad_norm": 4.759591102600098,
	"learning_rate": 2.931260010678057e-05,
	"loss": 0.4671,
	"step": 15500
	},
	{
	"epoch": 4.271222637479979,
	"grad_norm": 1.4791502952575684,
	"learning_rate": 2.8645221569674318e-05,
	"loss": 0.4719,
	"step": 16000
	},
	{
	"epoch": 4.404698344901228,
	"grad_norm": 1.2884821891784668,
	"learning_rate": 2.7977843032568074e-05,
	"loss": 0.465,
	"step": 16500
	},
	{
	"epoch": 4.538174052322478,
	"grad_norm": 3.4914660453796387,
	"learning_rate": 2.731046449546183e-05,
	"loss": 0.4572,
	"step": 17000
	},
	{
	"epoch": 4.671649759743727,
	"grad_norm": 1.9152294397354126,
	"learning_rate": 2.664308595835558e-05,
	"loss": 0.4685,
	"step": 17500
	},
	{
	"epoch": 4.805125467164976,
	"grad_norm": 2.193741798400879,
	"learning_rate": 2.5975707421249334e-05,
	"loss": 0.4751,
	"step": 18000
	},
	{
	"epoch": 4.938601174586225,
	"grad_norm": 1.8435180187225342,
	"learning_rate": 2.530832888414309e-05,
	"loss": 0.4671,
	"step": 18500
	},
	{
	"epoch": 5.072076882007474,
	"grad_norm": 2.184936761856079,
	"learning_rate": 2.464095034703684e-05,
	"loss": 0.4342,
	"step": 19000
	},
	{
	"epoch": 5.205552589428724,
	"grad_norm": 1.9590941667556763,
	"learning_rate": 2.3973571809930594e-05,
	"loss": 0.3865,
	"step": 19500
	},
	{
	"epoch": 5.339028296849973,
	"grad_norm": 2.4979445934295654,
	"learning_rate": 2.3306193272824347e-05,
	"loss": 0.3983,
	"step": 20000
	}
	],
	"logging_steps": 500,
	"max_steps": 37460,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 10,
	"save_steps": 10000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 6911108207443968.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}