mjaliz
/

bge-retrmoae-27M-e4

Model card Files Files and versions

bge-retrmoae-27M-e4 / trainer_state.json

mjaliz's picture

Upload folder using huggingface_hub

1d1d48b verified 5 months ago

history blame contribute delete

3.3 kB

	{
	"best_global_step": 847768,
	"best_metric": 0.9335971474647522,
	"best_model_checkpoint": "./mjaliz/product_titles_27M_bge-m3-retromae/checkpoint-847768",
	"epoch": 4.0,
	"eval_steps": 500,
	"global_step": 847768,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.4718271980070019,
	"grad_norm": 4.73534631729126,
	"learning_rate": 1.8129818363857907e-05,
	"loss": 1.4995,
	"step": 100000
	},
	{
	"epoch": 0.9436543960140038,
	"grad_norm": 4.425976276397705,
	"learning_rate": 1.6240726922386682e-05,
	"loss": 1.2511,
	"step": 200000
	},
	{
	"epoch": 1.0,
	"eval_loss": 1.126031756401062,
	"eval_runtime": 8694.616,
	"eval_samples_per_second": 650.033,
	"eval_steps_per_second": 6.5,
	"step": 211942
	},
	{
	"epoch": 1.4154815940210057,
	"grad_norm": 4.945573806762695,
	"learning_rate": 1.4351635480915455e-05,
	"loss": 1.1637,
	"step": 300000
	},
	{
	"epoch": 1.8873087920280076,
	"grad_norm": 4.503483772277832,
	"learning_rate": 1.246254403944423e-05,
	"loss": 1.1104,
	"step": 400000
	},
	{
	"epoch": 2.0,
	"eval_loss": 1.0256075859069824,
	"eval_runtime": 8677.2104,
	"eval_samples_per_second": 651.337,
	"eval_steps_per_second": 6.513,
	"step": 423884
	},
	{
	"epoch": 2.3591359900350097,
	"grad_norm": 3.8631985187530518,
	"learning_rate": 1.0573452597973006e-05,
	"loss": 1.0708,
	"step": 500000
	},
	{
	"epoch": 2.8309631880420114,
	"grad_norm": 4.198337078094482,
	"learning_rate": 8.684361156501781e-06,
	"loss": 1.0412,
	"step": 600000
	},
	{
	"epoch": 3.0,
	"eval_loss": 0.9685180187225342,
	"eval_runtime": 8703.9306,
	"eval_samples_per_second": 649.338,
	"eval_steps_per_second": 6.493,
	"step": 635826
	},
	{
	"epoch": 3.3027903860490135,
	"grad_norm": 3.789280414581299,
	"learning_rate": 6.795269715030557e-06,
	"loss": 1.0148,
	"step": 700000
	},
	{
	"epoch": 3.774617584056015,
	"grad_norm": 3.9477617740631104,
	"learning_rate": 4.906178273559332e-06,
	"loss": 0.9943,
	"step": 800000
	},
	{
	"epoch": 4.0,
	"eval_loss": 0.9335971474647522,
	"eval_runtime": 8688.2062,
	"eval_samples_per_second": 650.513,
	"eval_steps_per_second": 6.505,
	"step": 847768
	}
	],
	"logging_steps": 100000,
	"max_steps": 1059710,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 5,
	"early_stopping_threshold": 0.01
	},
	"attributes": {
	"early_stopping_patience_counter": 0
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.953539160600081e+19,
	"train_batch_size": 100,
	"trial_name": null,
	"trial_params": null
	}