timaeus
/

tetra-ind-w15k-20k

Model card Files Files and versions

tetra-ind-w15k-20k / trainer_state.json

bfpill's picture

Upload tokenizer & config to main

9b0e057 verified 3 months ago

history blame contribute delete

4.08 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.47340779643464753,
	"eval_steps": 100,
	"global_step": 2000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.00023670389821732376,
	"grad_norm": 2.5553174018859863,
	"learning_rate": 0.001,
	"loss": 10.9944,
	"step": 1
	},
	{
	"epoch": 0.023670389821732377,
	"grad_norm": 0.545368492603302,
	"learning_rate": 0.001,
	"loss": 7.7121,
	"step": 100
	},
	{
	"epoch": 0.04734077964346475,
	"grad_norm": 0.528997540473938,
	"learning_rate": 0.001,
	"loss": 6.3787,
	"step": 200
	},
	{
	"epoch": 0.07101116946519713,
	"grad_norm": 1.7439062595367432,
	"learning_rate": 0.001,
	"loss": 5.9739,
	"step": 300
	},
	{
	"epoch": 0.0946815592869295,
	"grad_norm": 1.6054385900497437,
	"learning_rate": 0.001,
	"loss": 5.7237,
	"step": 400
	},
	{
	"epoch": 0.11835194910866188,
	"grad_norm": 1.6123534440994263,
	"learning_rate": 0.001,
	"loss": 5.5599,
	"step": 500
	},
	{
	"epoch": 0.14202233893039426,
	"grad_norm": 1.2416802644729614,
	"learning_rate": 0.001,
	"loss": 5.4246,
	"step": 600
	},
	{
	"epoch": 0.16569272875212665,
	"grad_norm": 0.967766523361206,
	"learning_rate": 0.001,
	"loss": 5.3145,
	"step": 700
	},
	{
	"epoch": 0.189363118573859,
	"grad_norm": 1.3783754110336304,
	"learning_rate": 0.001,
	"loss": 5.2298,
	"step": 800
	},
	{
	"epoch": 0.2130335083955914,
	"grad_norm": 1.336877703666687,
	"learning_rate": 0.001,
	"loss": 5.1636,
	"step": 900
	},
	{
	"epoch": 0.23670389821732377,
	"grad_norm": 1.2909138202667236,
	"learning_rate": 0.001,
	"loss": 5.1127,
	"step": 1000
	},
	{
	"epoch": 0.26037428803905616,
	"grad_norm": 0.715796172618866,
	"learning_rate": 0.001,
	"loss": 5.0768,
	"step": 1100
	},
	{
	"epoch": 0.2840446778607885,
	"grad_norm": 1.2406651973724365,
	"learning_rate": 0.001,
	"loss": 5.0397,
	"step": 1200
	},
	{
	"epoch": 0.3077150676825209,
	"grad_norm": 1.1353099346160889,
	"learning_rate": 0.001,
	"loss": 5.0067,
	"step": 1300
	},
	{
	"epoch": 0.3313854575042533,
	"grad_norm": 0.5953907370567322,
	"learning_rate": 0.001,
	"loss": 4.9782,
	"step": 1400
	},
	{
	"epoch": 0.35505584732598566,
	"grad_norm": 1.2066272497177124,
	"learning_rate": 0.001,
	"loss": 4.955,
	"step": 1500
	},
	{
	"epoch": 0.378726237147718,
	"grad_norm": 1.0147947072982788,
	"learning_rate": 0.001,
	"loss": 4.937,
	"step": 1600
	},
	{
	"epoch": 0.4023966269694504,
	"grad_norm": 1.634466290473938,
	"learning_rate": 0.001,
	"loss": 4.9147,
	"step": 1700
	},
	{
	"epoch": 0.4260670167911828,
	"grad_norm": 1.3447681665420532,
	"learning_rate": 0.001,
	"loss": 4.9013,
	"step": 1800
	},
	{
	"epoch": 0.44973740661291517,
	"grad_norm": 1.3308050632476807,
	"learning_rate": 0.001,
	"loss": 4.8883,
	"step": 1900
	},
	{
	"epoch": 0.47340779643464753,
	"grad_norm": 1.0306942462921143,
	"learning_rate": 0.001,
	"loss": 4.8728,
	"step": 2000
	}
	],
	"logging_steps": 100,
	"max_steps": 40000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 10,
	"save_steps": 2000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 8.2659866836992e+16,
	"train_batch_size": 64,
	"trial_name": null,
	"trial_params": null
	}