Upload folder using huggingface_hub

2a65b40 verified 9 months ago

7.28 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9967335510965936,
	"eval_steps": 500,
	"global_step": 178,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0055996266915538965,
	"grad_norm": 0.9287750653110117,
	"learning_rate": 5.555555555555555e-07,
	"loss": 0.7737,
	"step": 1
	},
	{
	"epoch": 0.027998133457769483,
	"grad_norm": 0.7051671404282305,
	"learning_rate": 2.7777777777777783e-06,
	"loss": 0.7341,
	"step": 5
	},
	{
	"epoch": 0.05599626691553897,
	"grad_norm": 0.2520214877481558,
	"learning_rate": 5.555555555555557e-06,
	"loss": 0.5258,
	"step": 10
	},
	{
	"epoch": 0.08399440037330845,
	"grad_norm": 0.14052119101634014,
	"learning_rate": 8.333333333333334e-06,
	"loss": 0.2894,
	"step": 15
	},
	{
	"epoch": 0.11199253383107793,
	"grad_norm": 0.056427898284759595,
	"learning_rate": 9.996145181203616e-06,
	"loss": 0.1822,
	"step": 20
	},
	{
	"epoch": 0.1399906672888474,
	"grad_norm": 0.03842870313192144,
	"learning_rate": 9.952846702217886e-06,
	"loss": 0.144,
	"step": 25
	},
	{
	"epoch": 0.1679888007466169,
	"grad_norm": 0.03200746314332259,
	"learning_rate": 9.861849601988384e-06,
	"loss": 0.1211,
	"step": 30
	},
	{
	"epoch": 0.19598693420438637,
	"grad_norm": 0.02565149233539642,
	"learning_rate": 9.72403023233439e-06,
	"loss": 0.1101,
	"step": 35
	},
	{
	"epoch": 0.22398506766215587,
	"grad_norm": 0.024613549317984977,
	"learning_rate": 9.540715869125407e-06,
	"loss": 0.097,
	"step": 40
	},
	{
	"epoch": 0.25198320111992534,
	"grad_norm": 0.02144179428017716,
	"learning_rate": 9.31367192988896e-06,
	"loss": 0.0871,
	"step": 45
	},
	{
	"epoch": 0.2799813345776948,
	"grad_norm": 0.023284769150725856,
	"learning_rate": 9.045084971874738e-06,
	"loss": 0.0817,
	"step": 50
	},
	{
	"epoch": 0.3079794680354643,
	"grad_norm": 0.02269598632952473,
	"learning_rate": 8.737541634312985e-06,
	"loss": 0.0748,
	"step": 55
	},
	{
	"epoch": 0.3359776014932338,
	"grad_norm": 0.024675985934094376,
	"learning_rate": 8.39400372766471e-06,
	"loss": 0.0708,
	"step": 60
	},
	{
	"epoch": 0.36397573495100327,
	"grad_norm": 0.02363318129449854,
	"learning_rate": 8.017779709767857e-06,
	"loss": 0.0632,
	"step": 65
	},
	{
	"epoch": 0.39197386840877274,
	"grad_norm": 0.017744170199618194,
	"learning_rate": 7.612492823579744e-06,
	"loss": 0.0601,
	"step": 70
	},
	{
	"epoch": 0.4199720018665422,
	"grad_norm": 0.02180319562108693,
	"learning_rate": 7.18204620336671e-06,
	"loss": 0.0557,
	"step": 75
	},
	{
	"epoch": 0.44797013532431174,
	"grad_norm": 0.021481125876423392,
	"learning_rate": 6.730585285387465e-06,
	"loss": 0.0612,
	"step": 80
	},
	{
	"epoch": 0.4759682687820812,
	"grad_norm": 0.01901447075970861,
	"learning_rate": 6.26245788507579e-06,
	"loss": 0.0511,
	"step": 85
	},
	{
	"epoch": 0.5039664022398507,
	"grad_norm": 0.019624242818030622,
	"learning_rate": 5.782172325201155e-06,
	"loss": 0.0541,
	"step": 90
	},
	{
	"epoch": 0.5319645356976201,
	"grad_norm": 0.021770565533330784,
	"learning_rate": 5.294354018255945e-06,
	"loss": 0.052,
	"step": 95
	},
	{
	"epoch": 0.5599626691553896,
	"grad_norm": 0.019329162918422096,
	"learning_rate": 4.803700921204659e-06,
	"loss": 0.0494,
	"step": 100
	},
	{
	"epoch": 0.5879608026131591,
	"grad_norm": 0.020279337189906023,
	"learning_rate": 4.314938291590161e-06,
	"loss": 0.0472,
	"step": 105
	},
	{
	"epoch": 0.6159589360709286,
	"grad_norm": 0.020370573885900606,
	"learning_rate": 3.832773180720475e-06,
	"loss": 0.0496,
	"step": 110
	},
	{
	"epoch": 0.6439570695286981,
	"grad_norm": 0.01780890820627811,
	"learning_rate": 3.3618491021915334e-06,
	"loss": 0.0418,
	"step": 115
	},
	{
	"epoch": 0.6719552029864676,
	"grad_norm": 0.01770442385609227,
	"learning_rate": 2.906701312312861e-06,
	"loss": 0.0463,
	"step": 120
	},
	{
	"epoch": 0.6999533364442371,
	"grad_norm": 0.017517610950091075,
	"learning_rate": 2.471713133110078e-06,
	"loss": 0.0427,
	"step": 125
	},
	{
	"epoch": 0.7279514699020065,
	"grad_norm": 0.017896600937746723,
	"learning_rate": 2.061073738537635e-06,
	"loss": 0.0436,
	"step": 130
	},
	{
	"epoch": 0.755949603359776,
	"grad_norm": 0.01501649892368704,
	"learning_rate": 1.6787378104435931e-06,
	"loss": 0.0428,
	"step": 135
	},
	{
	"epoch": 0.7839477368175455,
	"grad_norm": 0.0163607070923351,
	"learning_rate": 1.3283874528215735e-06,
	"loss": 0.047,
	"step": 140
	},
	{
	"epoch": 0.811945870275315,
	"grad_norm": 0.019565825006344753,
	"learning_rate": 1.013396731136465e-06,
	"loss": 0.0441,
	"step": 145
	},
	{
	"epoch": 0.8399440037330844,
	"grad_norm": 0.017999287355466544,
	"learning_rate": 7.367991782295392e-07,
	"loss": 0.0454,
	"step": 150
	},
	{
	"epoch": 0.8679421371908539,
	"grad_norm": 0.020504960252671952,
	"learning_rate": 5.012585797388936e-07,
	"loss": 0.0485,
	"step": 155
	},
	{
	"epoch": 0.8959402706486235,
	"grad_norm": 0.016379587248382697,
	"learning_rate": 3.0904332038757977e-07,
	"loss": 0.0434,
	"step": 160
	},
	{
	"epoch": 0.9239384041063929,
	"grad_norm": 0.0174476169165481,
	"learning_rate": 1.6200453819870122e-07,
	"loss": 0.05,
	"step": 165
	},
	{
	"epoch": 0.9519365375641624,
	"grad_norm": 0.01783292237434629,
	"learning_rate": 6.15582970243117e-08,
	"loss": 0.0452,
	"step": 170
	},
	{
	"epoch": 0.9799346710219319,
	"grad_norm": 0.01908510011245694,
	"learning_rate": 8.671949076420883e-09,
	"loss": 0.0427,
	"step": 175
	},
	{
	"epoch": 0.9967335510965936,
	"step": 178,
	"total_flos": 9.035586631704248e+17,
	"train_loss": 0.10327898384479994,
	"train_runtime": 2928.2846,
	"train_samples_per_second": 2.927,
	"train_steps_per_second": 0.061
	}
	],
	"logging_steps": 5,
	"max_steps": 178,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 9.035586631704248e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}