Upload folder using huggingface_hub

c7622c1 verified over 1 year ago

8.39 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.14629240193837434,
	"eval_steps": 500,
	"global_step": 50,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0,
	"grad_norm": 18.54335018044506,
	"learning_rate": 9.090909090909092e-05,
	"loss": 6.4244,
	"step": 1
	},
	{
	"epoch": 0.01,
	"grad_norm": 18.204878497494867,
	"learning_rate": 0.00018181818181818183,
	"loss": 6.4841,
	"step": 2
	},
	{
	"epoch": 0.01,
	"grad_norm": 7.638311117651696,
	"learning_rate": 0.00027272727272727274,
	"loss": 4.3156,
	"step": 3
	},
	{
	"epoch": 0.01,
	"grad_norm": 3.236258184290763,
	"learning_rate": 0.00036363636363636367,
	"loss": 3.8398,
	"step": 4
	},
	{
	"epoch": 0.01,
	"grad_norm": 1.4816791146245016,
	"learning_rate": 0.00045454545454545455,
	"loss": 3.4542,
	"step": 5
	},
	{
	"epoch": 0.02,
	"grad_norm": 2.4160385130751787,
	"learning_rate": 0.0005454545454545455,
	"loss": 3.0333,
	"step": 6
	},
	{
	"epoch": 0.02,
	"grad_norm": 2.3449118849582677,
	"learning_rate": 0.0006363636363636364,
	"loss": 2.847,
	"step": 7
	},
	{
	"epoch": 0.02,
	"grad_norm": 0.782966544742558,
	"learning_rate": 0.0007272727272727273,
	"loss": 2.4424,
	"step": 8
	},
	{
	"epoch": 0.03,
	"grad_norm": 0.4286720843729141,
	"learning_rate": 0.0008181818181818183,
	"loss": 2.2581,
	"step": 9
	},
	{
	"epoch": 0.03,
	"grad_norm": 0.19557058746571676,
	"learning_rate": 0.0009090909090909091,
	"loss": 2.1925,
	"step": 10
	},
	{
	"epoch": 0.03,
	"grad_norm": 0.19629394764775782,
	"learning_rate": 0.001,
	"loss": 2.15,
	"step": 11
	},
	{
	"epoch": 0.04,
	"grad_norm": 0.24283548114329223,
	"learning_rate": 0.0009999773426770863,
	"loss": 2.11,
	"step": 12
	},
	{
	"epoch": 0.04,
	"grad_norm": 0.1602137829534043,
	"learning_rate": 0.000999909372761763,
	"loss": 2.1018,
	"step": 13
	},
	{
	"epoch": 0.04,
	"grad_norm": 0.9265558670447405,
	"learning_rate": 0.0009997960964140947,
	"loss": 2.156,
	"step": 14
	},
	{
	"epoch": 0.04,
	"grad_norm": 0.15041539304459745,
	"learning_rate": 0.0009996375239002368,
	"loss": 2.1039,
	"step": 15
	},
	{
	"epoch": 0.05,
	"grad_norm": 0.08444195746666726,
	"learning_rate": 0.000999433669591504,
	"loss": 2.0826,
	"step": 16
	},
	{
	"epoch": 0.05,
	"grad_norm": 0.05336799505560437,
	"learning_rate": 0.0009991845519630679,
	"loss": 2.0558,
	"step": 17
	},
	{
	"epoch": 0.05,
	"grad_norm": 0.04503459780125264,
	"learning_rate": 0.0009988901935922825,
	"loss": 2.0479,
	"step": 18
	},
	{
	"epoch": 0.06,
	"grad_norm": 0.03827244634244726,
	"learning_rate": 0.0009985506211566387,
	"loss": 2.0445,
	"step": 19
	},
	{
	"epoch": 0.06,
	"grad_norm": 0.040159263058230144,
	"learning_rate": 0.0009981658654313456,
	"loss": 2.0618,
	"step": 20
	},
	{
	"epoch": 0.06,
	"grad_norm": 0.04362572422821177,
	"learning_rate": 0.0009977359612865424,
	"loss": 2.04,
	"step": 21
	},
	{
	"epoch": 0.06,
	"grad_norm": 0.04512109012272362,
	"learning_rate": 0.0009972609476841367,
	"loss": 2.035,
	"step": 22
	},
	{
	"epoch": 0.07,
	"grad_norm": 0.0460336532256244,
	"learning_rate": 0.0009967408676742752,
	"loss": 2.0438,
	"step": 23
	},
	{
	"epoch": 0.07,
	"grad_norm": 0.04161179529868031,
	"learning_rate": 0.0009961757683914405,
	"loss": 2.0075,
	"step": 24
	},
	{
	"epoch": 0.07,
	"grad_norm": 0.03913485984103564,
	"learning_rate": 0.0009955657010501807,
	"loss": 1.9991,
	"step": 25
	},
	{
	"epoch": 0.08,
	"grad_norm": 0.036980743783326483,
	"learning_rate": 0.0009949107209404665,
	"loss": 1.9939,
	"step": 26
	},
	{
	"epoch": 0.08,
	"grad_norm": 0.03318251717420227,
	"learning_rate": 0.0009942108874226813,
	"loss": 1.9927,
	"step": 27
	},
	{
	"epoch": 0.08,
	"grad_norm": 0.0331961384143108,
	"learning_rate": 0.0009934662639222412,
	"loss": 1.9806,
	"step": 28
	},
	{
	"epoch": 0.08,
	"grad_norm": 0.03562839770417466,
	"learning_rate": 0.0009926769179238466,
	"loss": 1.9833,
	"step": 29
	},
	{
	"epoch": 0.09,
	"grad_norm": 0.05177166624815994,
	"learning_rate": 0.0009918429209653662,
	"loss": 1.987,
	"step": 30
	},
	{
	"epoch": 0.09,
	"grad_norm": 0.04047010737996018,
	"learning_rate": 0.0009909643486313534,
	"loss": 1.9683,
	"step": 31
	},
	{
	"epoch": 0.09,
	"grad_norm": 0.04112376138396604,
	"learning_rate": 0.0009900412805461966,
	"loss": 1.9509,
	"step": 32
	},
	{
	"epoch": 0.1,
	"grad_norm": 0.045127786908756885,
	"learning_rate": 0.0009890738003669028,
	"loss": 1.934,
	"step": 33
	},
	{
	"epoch": 0.1,
	"grad_norm": 0.03348389008468475,
	"learning_rate": 0.000988061995775515,
	"loss": 1.9435,
	"step": 34
	},
	{
	"epoch": 0.1,
	"grad_norm": 0.0369790335444681,
	"learning_rate": 0.0009870059584711668,
	"loss": 1.9304,
	"step": 35
	},
	{
	"epoch": 0.11,
	"grad_norm": 0.03732257647443908,
	"learning_rate": 0.000985905784161771,
	"loss": 1.9318,
	"step": 36
	},
	{
	"epoch": 0.11,
	"grad_norm": 0.034825381944822265,
	"learning_rate": 0.0009847615725553456,
	"loss": 1.9093,
	"step": 37
	},
	{
	"epoch": 0.11,
	"grad_norm": 0.03380767656809614,
	"learning_rate": 0.0009835734273509786,
	"loss": 1.9031,
	"step": 38
	},
	{
	"epoch": 0.11,
	"grad_norm": 0.03270535048583525,
	"learning_rate": 0.000982341456229428,
	"loss": 1.8901,
	"step": 39
	},
	{
	"epoch": 0.12,
	"grad_norm": 0.02874837099611008,
	"learning_rate": 0.0009810657708433637,
	"loss": 1.9126,
	"step": 40
	},
	{
	"epoch": 0.12,
	"grad_norm": 0.02842928929645405,
	"learning_rate": 0.0009797464868072487,
	"loss": 1.8894,
	"step": 41
	},
	{
	"epoch": 0.12,
	"grad_norm": 0.02584963545732469,
	"learning_rate": 0.0009783837236868609,
	"loss": 1.8641,
	"step": 42
	},
	{
	"epoch": 0.13,
	"grad_norm": 0.028295949533072753,
	"learning_rate": 0.0009769776049884564,
	"loss": 1.8857,
	"step": 43
	},
	{
	"epoch": 0.13,
	"grad_norm": 0.030410135452704438,
	"learning_rate": 0.0009755282581475768,
	"loss": 1.8934,
	"step": 44
	},
	{
	"epoch": 0.13,
	"grad_norm": 0.02966185510977657,
	"learning_rate": 0.0009740358145174998,
	"loss": 1.8535,
	"step": 45
	},
	{
	"epoch": 0.13,
	"grad_norm": 0.030063842481471717,
	"learning_rate": 0.0009725004093573342,
	"loss": 1.8481,
	"step": 46
	},
	{
	"epoch": 0.14,
	"grad_norm": 0.02556650721323588,
	"learning_rate": 0.0009709221818197624,
	"loss": 1.8638,
	"step": 47
	},
	{
	"epoch": 0.14,
	"grad_norm": 0.026845238542832082,
	"learning_rate": 0.0009693012749384279,
	"loss": 1.8409,
	"step": 48
	},
	{
	"epoch": 0.14,
	"grad_norm": 0.026180343066426557,
	"learning_rate": 0.0009676378356149733,
	"loss": 1.8536,
	"step": 49
	},
	{
	"epoch": 0.15,
	"grad_norm": 0.026565196661024762,
	"learning_rate": 0.0009659320146057262,
	"loss": 1.8584,
	"step": 50
	}
	],
	"logging_steps": 1.0,
	"max_steps": 341,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 25,
	"total_flos": 1.0991121077706424e+18,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}