Upload folder using huggingface_hub

dacfd03 verified about 2 months ago

4.95 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.9961315280464218,
	"eval_steps": 500,
	"global_step": 258,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.07736943907156674,
	"grad_norm": 0.7897351384162903,
	"learning_rate": 2e-05,
	"loss": 1.7966,
	"step": 10
	},
	{
	"epoch": 0.15473887814313347,
	"grad_norm": 0.546701192855835,
	"learning_rate": 4e-05,
	"loss": 1.6821,
	"step": 20
	},
	{
	"epoch": 0.23210831721470018,
	"grad_norm": 0.24719476699829102,
	"learning_rate": 6e-05,
	"loss": 1.4684,
	"step": 30
	},
	{
	"epoch": 0.30947775628626695,
	"grad_norm": 0.19408376514911652,
	"learning_rate": 8e-05,
	"loss": 1.3593,
	"step": 40
	},
	{
	"epoch": 0.38684719535783363,
	"grad_norm": 0.18511685729026794,
	"learning_rate": 0.0001,
	"loss": 1.2809,
	"step": 50
	},
	{
	"epoch": 0.46421663442940037,
	"grad_norm": 0.18041056394577026,
	"learning_rate": 0.00012,
	"loss": 1.1822,
	"step": 60
	},
	{
	"epoch": 0.5415860735009671,
	"grad_norm": 0.18419483304023743,
	"learning_rate": 0.00014,
	"loss": 1.0731,
	"step": 70
	},
	{
	"epoch": 0.6189555125725339,
	"grad_norm": 0.22910986840724945,
	"learning_rate": 0.00016,
	"loss": 1.0346,
	"step": 80
	},
	{
	"epoch": 0.6963249516441006,
	"grad_norm": 0.20705537497997284,
	"learning_rate": 0.00018,
	"loss": 0.983,
	"step": 90
	},
	{
	"epoch": 0.7736943907156673,
	"grad_norm": 0.22625038027763367,
	"learning_rate": 0.0002,
	"loss": 0.9447,
	"step": 100
	},
	{
	"epoch": 0.851063829787234,
	"grad_norm": 0.23730379343032837,
	"learning_rate": 0.00019802973668046363,
	"loss": 0.9222,
	"step": 110
	},
	{
	"epoch": 0.9284332688588007,
	"grad_norm": 0.20882545411586761,
	"learning_rate": 0.00019219658547282067,
	"loss": 0.9112,
	"step": 120
	},
	{
	"epoch": 1.0058027079303675,
	"grad_norm": 0.240465447306633,
	"learning_rate": 0.00018273040325430574,
	"loss": 0.903,
	"step": 130
	},
	{
	"epoch": 1.0831721470019342,
	"grad_norm": 0.2476571947336197,
	"learning_rate": 0.00017000420745694254,
	"loss": 0.8574,
	"step": 140
	},
	{
	"epoch": 1.1605415860735009,
	"grad_norm": 0.22345130145549774,
	"learning_rate": 0.00015451947721626676,
	"loss": 0.8703,
	"step": 150
	},
	{
	"epoch": 1.2379110251450678,
	"grad_norm": 0.23553310334682465,
	"learning_rate": 0.00013688639245240078,
	"loss": 0.8414,
	"step": 160
	},
	{
	"epoch": 1.3152804642166345,
	"grad_norm": 0.2857750952243805,
	"learning_rate": 0.00011779978956775506,
	"loss": 0.8407,
	"step": 170
	},
	{
	"epoch": 1.3926499032882012,
	"grad_norm": 0.24604524672031403,
	"learning_rate": 9.801178123349298e-05,
	"loss": 0.8429,
	"step": 180
	},
	{
	"epoch": 1.4700193423597678,
	"grad_norm": 0.2211475670337677,
	"learning_rate": 7.83021191893682e-05,
	"loss": 0.8352,
	"step": 190
	},
	{
	"epoch": 1.5473887814313345,
	"grad_norm": 0.23726575076580048,
	"learning_rate": 5.9447467918700614e-05,
	"loss": 0.819,
	"step": 200
	},
	{
	"epoch": 1.6247582205029012,
	"grad_norm": 0.2289407104253769,
	"learning_rate": 4.219079997751515e-05,
	"loss": 0.8318,
	"step": 210
	},
	{
	"epoch": 1.702127659574468,
	"grad_norm": 0.24488922953605652,
	"learning_rate": 2.7212118963050592e-05,
	"loss": 0.8544,
	"step": 220
	},
	{
	"epoch": 1.7794970986460348,
	"grad_norm": 0.2198326289653778,
	"learning_rate": 1.5101663790863596e-05,
	"loss": 0.8344,
	"step": 230
	},
	{
	"epoch": 1.8568665377176017,
	"grad_norm": 0.23588140308856964,
	"learning_rate": 6.336650173127223e-06,
	"loss": 0.859,
	"step": 240
	},
	{
	"epoch": 1.9342359767891684,
	"grad_norm": 0.22782348096370697,
	"learning_rate": 1.2624658063666639e-06,
	"loss": 0.7938,
	"step": 250
	}
	],
	"logging_steps": 10,
	"max_steps": 258,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.590811292060877e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}