Upload folder using huggingface_hub

c5cce35 verified about 2 months ago

6.86 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.998642226748133,
	"eval_steps": 500,
	"global_step": 368,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05431093007467753,
	"grad_norm": 1.0013175010681152,
	"learning_rate": 2e-05,
	"loss": 1.8305,
	"step": 10
	},
	{
	"epoch": 0.10862186014935506,
	"grad_norm": 0.6650476455688477,
	"learning_rate": 4e-05,
	"loss": 1.6895,
	"step": 20
	},
	{
	"epoch": 0.1629327902240326,
	"grad_norm": 0.2262110561132431,
	"learning_rate": 6e-05,
	"loss": 1.4921,
	"step": 30
	},
	{
	"epoch": 0.2172437202987101,
	"grad_norm": 0.22021710872650146,
	"learning_rate": 8e-05,
	"loss": 1.4074,
	"step": 40
	},
	{
	"epoch": 0.27155465037338766,
	"grad_norm": 0.18622460961341858,
	"learning_rate": 0.0001,
	"loss": 1.2593,
	"step": 50
	},
	{
	"epoch": 0.3258655804480652,
	"grad_norm": 0.18928822875022888,
	"learning_rate": 0.00012,
	"loss": 1.1828,
	"step": 60
	},
	{
	"epoch": 0.3801765105227427,
	"grad_norm": 4.061775207519531,
	"learning_rate": 0.00014,
	"loss": 1.0697,
	"step": 70
	},
	{
	"epoch": 0.4344874405974202,
	"grad_norm": 0.20349960029125214,
	"learning_rate": 0.00016,
	"loss": 1.0102,
	"step": 80
	},
	{
	"epoch": 0.48879837067209775,
	"grad_norm": 0.23872722685337067,
	"learning_rate": 0.00018,
	"loss": 0.9941,
	"step": 90
	},
	{
	"epoch": 0.5431093007467753,
	"grad_norm": 0.20515283942222595,
	"learning_rate": 0.0002,
	"loss": 0.9608,
	"step": 100
	},
	{
	"epoch": 0.5974202308214528,
	"grad_norm": 0.22473494708538055,
	"learning_rate": 0.00019931371771625544,
	"loss": 0.9308,
	"step": 110
	},
	{
	"epoch": 0.6517311608961304,
	"grad_norm": 0.24142144620418549,
	"learning_rate": 0.0001972642905324813,
	"loss": 0.9381,
	"step": 120
	},
	{
	"epoch": 0.7060420909708078,
	"grad_norm": 0.260145902633667,
	"learning_rate": 0.00019387984816003867,
	"loss": 0.8955,
	"step": 130
	},
	{
	"epoch": 0.7603530210454854,
	"grad_norm": 0.22235779464244843,
	"learning_rate": 0.00018920684425573865,
	"loss": 0.8667,
	"step": 140
	},
	{
	"epoch": 0.814663951120163,
	"grad_norm": 0.23329713940620422,
	"learning_rate": 0.00018330941881540915,
	"loss": 0.873,
	"step": 150
	},
	{
	"epoch": 0.8689748811948405,
	"grad_norm": 0.2553715109825134,
	"learning_rate": 0.0001762685178110382,
	"loss": 0.8651,
	"step": 160
	},
	{
	"epoch": 0.923285811269518,
	"grad_norm": 0.22536128759384155,
	"learning_rate": 0.0001681807821550438,
	"loss": 0.8504,
	"step": 170
	},
	{
	"epoch": 0.9775967413441955,
	"grad_norm": 0.22558774054050446,
	"learning_rate": 0.00015915722124135227,
	"loss": 0.8414,
	"step": 180
	},
	{
	"epoch": 1.031907671418873,
	"grad_norm": 0.2400912493467331,
	"learning_rate": 0.00014932168926979074,
	"loss": 0.8389,
	"step": 190
	},
	{
	"epoch": 1.0862186014935507,
	"grad_norm": 0.23116886615753174,
	"learning_rate": 0.00013880918526722497,
	"loss": 0.8289,
	"step": 200
	},
	{
	"epoch": 1.140529531568228,
	"grad_norm": 0.2643767297267914,
	"learning_rate": 0.00012776400013875006,
	"loss": 0.8037,
	"step": 210
	},
	{
	"epoch": 1.1948404616429056,
	"grad_norm": 0.23654422163963318,
	"learning_rate": 0.00011633773618185302,
	"loss": 0.8209,
	"step": 220
	},
	{
	"epoch": 1.2491513917175832,
	"grad_norm": 0.25414180755615234,
	"learning_rate": 0.00010468722624699401,
	"loss": 0.8327,
	"step": 230
	},
	{
	"epoch": 1.3034623217922607,
	"grad_norm": 0.24420738220214844,
	"learning_rate": 9.297238110547074e-05,
	"loss": 0.8056,
	"step": 240
	},
	{
	"epoch": 1.3577732518669383,
	"grad_norm": 0.23167012631893158,
	"learning_rate": 8.13539945708319e-05,
	"loss": 0.8294,
	"step": 250
	},
	{
	"epoch": 1.4120841819416157,
	"grad_norm": 0.2467813491821289,
	"learning_rate": 6.999153649996595e-05,
	"loss": 0.7809,
	"step": 260
	},
	{
	"epoch": 1.4663951120162932,
	"grad_norm": 0.2599235475063324,
	"learning_rate": 5.904096396634935e-05,
	"loss": 0.7995,
	"step": 270
	},
	{
	"epoch": 1.5207060420909708,
	"grad_norm": 0.2679561674594879,
	"learning_rate": 4.865258064851579e-05,
	"loss": 0.7845,
	"step": 280
	},
	{
	"epoch": 1.5750169721656482,
	"grad_norm": 0.2540304958820343,
	"learning_rate": 3.8968973815020806e-05,
	"loss": 0.7868,
	"step": 290
	},
	{
	"epoch": 1.629327902240326,
	"grad_norm": 0.26017382740974426,
	"learning_rate": 3.0123057222115836e-05,
	"loss": 0.7665,
	"step": 300
	},
	{
	"epoch": 1.6836388323150033,
	"grad_norm": 0.2591923177242279,
	"learning_rate": 2.2236246786624792e-05,
	"loss": 0.7936,
	"step": 310
	},
	{
	"epoch": 1.737949762389681,
	"grad_norm": 0.2649495601654053,
	"learning_rate": 1.5416794074090258e-05,
	"loss": 0.7597,
	"step": 320
	},
	{
	"epoch": 1.7922606924643585,
	"grad_norm": 0.25946423411369324,
	"learning_rate": 9.75830047614117e-06,
	"loss": 0.7954,
	"step": 330
	},
	{
	"epoch": 1.8465716225390358,
	"grad_norm": 0.24905863404273987,
	"learning_rate": 5.338432470956589e-06,
	"loss": 0.7648,
	"step": 340
	},
	{
	"epoch": 1.9008825526137136,
	"grad_norm": 0.2536468803882599,
	"learning_rate": 2.2178556007054872e-06,
	"loss": 0.8026,
	"step": 350
	},
	{
	"epoch": 1.955193482688391,
	"grad_norm": 0.27082565426826477,
	"learning_rate": 4.3940179781019055e-07,
	"loss": 0.7768,
	"step": 360
	}
	],
	"logging_steps": 10,
	"max_steps": 368,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.122148310240461e+17,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}