Upload folder using huggingface_hub

3c5e213 verified over 1 year ago

7.5 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9998005186515061,
	"eval_steps": 10000,
	"global_step": 3759,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.026597513132522108,
	"grad_norm": 0.14929383993148804,
	"learning_rate": 2.6595744680851064e-05,
	"loss": 0.7258,
	"step": 100
	},
	{
	"epoch": 0.053195026265044215,
	"grad_norm": 0.22324731945991516,
	"learning_rate": 5.319148936170213e-05,
	"loss": 0.477,
	"step": 200
	},
	{
	"epoch": 0.07979253939756632,
	"grad_norm": 0.1394360512495041,
	"learning_rate": 7.978723404255319e-05,
	"loss": 0.4484,
	"step": 300
	},
	{
	"epoch": 0.10639005253008843,
	"grad_norm": 0.1933247148990631,
	"learning_rate": 9.99875823256999e-05,
	"loss": 0.4301,
	"step": 400
	},
	{
	"epoch": 0.13298756566261055,
	"grad_norm": 0.1871040314435959,
	"learning_rate": 9.966886949974127e-05,
	"loss": 0.4265,
	"step": 500
	},
	{
	"epoch": 0.15958507879513265,
	"grad_norm": 0.12056238204240799,
	"learning_rate": 9.892213289286789e-05,
	"loss": 0.4169,
	"step": 600
	},
	{
	"epoch": 0.18618259192765477,
	"grad_norm": 0.23017819225788116,
	"learning_rate": 9.775380754233831e-05,
	"loss": 0.4149,
	"step": 700
	},
	{
	"epoch": 0.21278010506017686,
	"grad_norm": 0.22511781752109528,
	"learning_rate": 9.617396154591494e-05,
	"loss": 0.4127,
	"step": 800
	},
	{
	"epoch": 0.23937761819269898,
	"grad_norm": 0.15255825221538544,
	"learning_rate": 9.41962092995658e-05,
	"loss": 0.4059,
	"step": 900
	},
	{
	"epoch": 0.2659751313252211,
	"grad_norm": 0.1079307496547699,
	"learning_rate": 9.183759417477731e-05,
	"loss": 0.4018,
	"step": 1000
	},
	{
	"epoch": 0.2925726444577432,
	"grad_norm": 0.1743098944425583,
	"learning_rate": 8.9118441646512e-05,
	"loss": 0.3919,
	"step": 1100
	},
	{
	"epoch": 0.3191701575902653,
	"grad_norm": 0.19822756946086884,
	"learning_rate": 8.606218413748768e-05,
	"loss": 0.3909,
	"step": 1200
	},
	{
	"epoch": 0.3457676707227874,
	"grad_norm": 0.1920367330312729,
	"learning_rate": 8.26951590881904e-05,
	"loss": 0.3855,
	"step": 1300
	},
	{
	"epoch": 0.37236518385530953,
	"grad_norm": 0.1861521601676941,
	"learning_rate": 7.904638199276271e-05,
	"loss": 0.3907,
	"step": 1400
	},
	{
	"epoch": 0.39896269698783166,
	"grad_norm": 0.10632487386465073,
	"learning_rate": 7.514729635664032e-05,
	"loss": 0.3846,
	"step": 1500
	},
	{
	"epoch": 0.4255602101203537,
	"grad_norm": 0.2400396317243576,
	"learning_rate": 7.103150273068921e-05,
	"loss": 0.3904,
	"step": 1600
	},
	{
	"epoch": 0.45215772325287584,
	"grad_norm": 0.1306409388780594,
	"learning_rate": 6.673446915690408e-05,
	"loss": 0.3912,
	"step": 1700
	},
	{
	"epoch": 0.47875523638539796,
	"grad_norm": 0.16852012276649475,
	"learning_rate": 6.229322552091536e-05,
	"loss": 0.3806,
	"step": 1800
	},
	{
	"epoch": 0.5053527495179201,
	"grad_norm": 0.23730169236660004,
	"learning_rate": 5.774604444523663e-05,
	"loss": 0.3812,
	"step": 1900
	},
	{
	"epoch": 0.5319502626504422,
	"grad_norm": 0.17537447810173035,
	"learning_rate": 5.313211147316933e-05,
	"loss": 0.3767,
	"step": 2000
	},
	{
	"epoch": 0.5585477757829643,
	"grad_norm": 0.13570892810821533,
	"learning_rate": 4.849118738557042e-05,
	"loss": 0.3782,
	"step": 2100
	},
	{
	"epoch": 0.5851452889154865,
	"grad_norm": 0.21518413722515106,
	"learning_rate": 4.386326556048369e-05,
	"loss": 0.3706,
	"step": 2200
	},
	{
	"epoch": 0.6117428020480085,
	"grad_norm": 0.28089213371276855,
	"learning_rate": 3.9288227328354234e-05,
	"loss": 0.3805,
	"step": 2300
	},
	{
	"epoch": 0.6383403151805306,
	"grad_norm": 0.19185791909694672,
	"learning_rate": 3.4805498292818055e-05,
	"loss": 0.3683,
	"step": 2400
	},
	{
	"epoch": 0.6649378283130527,
	"grad_norm": 0.2617637515068054,
	"learning_rate": 3.045370857873868e-05,
	"loss": 0.3825,
	"step": 2500
	},
	{
	"epoch": 0.6915353414455748,
	"grad_norm": 0.11534757167100906,
	"learning_rate": 2.6270359935318967e-05,
	"loss": 0.3721,
	"step": 2600
	},
	{
	"epoch": 0.718132854578097,
	"grad_norm": 0.18955928087234497,
	"learning_rate": 2.22915025630421e-05,
	"loss": 0.3682,
	"step": 2700
	},
	{
	"epoch": 0.7447303677106191,
	"grad_norm": 0.22698479890823364,
	"learning_rate": 1.8551424449401173e-05,
	"loss": 0.3675,
	"step": 2800
	},
	{
	"epoch": 0.7713278808431412,
	"grad_norm": 0.18734973669052124,
	"learning_rate": 1.5082355890580507e-05,
	"loss": 0.3719,
	"step": 2900
	},
	{
	"epoch": 0.7979253939756633,
	"grad_norm": 0.22010599076747894,
	"learning_rate": 1.1914191745387143e-05,
	"loss": 0.363,
	"step": 3000
	},
	{
	"epoch": 0.8245229071081854,
	"grad_norm": 0.25703734159469604,
	"learning_rate": 9.074233814921846e-06,
	"loss": 0.3711,
	"step": 3100
	},
	{
	"epoch": 0.8511204202407074,
	"grad_norm": 0.15355700254440308,
	"learning_rate": 6.586955568045134e-06,
	"loss": 0.362,
	"step": 3200
	},
	{
	"epoch": 0.8777179333732296,
	"grad_norm": 0.20059217512607574,
	"learning_rate": 4.47379124012689e-06,
	"loss": 0.3422,
	"step": 3300
	},
	{
	"epoch": 0.9043154465057517,
	"grad_norm": 0.16272881627082825,
	"learning_rate": 2.7529511225315162e-06,
	"loss": 0.3667,
	"step": 3400
	},
	{
	"epoch": 0.9309129596382738,
	"grad_norm": 0.24089215695858002,
	"learning_rate": 1.4392646345894934e-06,
	"loss": 0.3883,
	"step": 3500
	},
	{
	"epoch": 0.9575104727707959,
	"grad_norm": 0.14056609570980072,
	"learning_rate": 5.440525303902377e-07,
	"loss": 0.3723,
	"step": 3600
	},
	{
	"epoch": 0.984107985903318,
	"grad_norm": 0.18514706194400787,
	"learning_rate": 7.502934165993791e-08,
	"loss": 0.366,
	"step": 3700
	},
	{
	"epoch": 0.9998005186515061,
	"step": 3759,
	"total_flos": 1.8198794606166934e+19,
	"train_loss": 0.39680684224885016,
	"train_runtime": 142849.6117,
	"train_samples_per_second": 0.421,
	"train_steps_per_second": 0.026
	}
	],
	"logging_steps": 100,
	"max_steps": 3759,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 2500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.8198794606166934e+19,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}