Upload folder using huggingface_hub

6956723 verified 9 months ago

5.86 kB

	{
	"best_metric": 1.7200040817260742,
	"best_model_checkpoint": "./results/cluster2_batch1_prop0.2/checkpoint-1500",
	"epoch": 0.9999231616422918,
	"eval_steps": 500,
	"global_step": 2440,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.04098045744435622,
	"grad_norm": 0.3399759829044342,
	"learning_rate": 9.997020702755353e-05,
	"loss": 1.9745,
	"step": 100
	},
	{
	"epoch": 0.08196091488871245,
	"grad_norm": 1.2304991483688354,
	"learning_rate": 9.930186708264901e-05,
	"loss": 1.7722,
	"step": 200
	},
	{
	"epoch": 0.12294137233306866,
	"grad_norm": 1.1636056900024414,
	"learning_rate": 9.776557563346957e-05,
	"loss": 1.7359,
	"step": 300
	},
	{
	"epoch": 0.1639218297774249,
	"grad_norm": 1.0206753015518188,
	"learning_rate": 9.538837884587511e-05,
	"loss": 1.6943,
	"step": 400
	},
	{
	"epoch": 0.2049022872217811,
	"grad_norm": 1.0695409774780273,
	"learning_rate": 9.221212689004862e-05,
	"loss": 1.6755,
	"step": 500
	},
	{
	"epoch": 0.2049022872217811,
	"eval_loss": 1.7410061359405518,
	"eval_runtime": 1240.4448,
	"eval_samples_per_second": 4.04,
	"eval_steps_per_second": 2.02,
	"step": 500
	},
	{
	"epoch": 0.24588274466613733,
	"grad_norm": 1.3881298303604126,
	"learning_rate": 8.82927371749271e-05,
	"loss": 1.6707,
	"step": 600
	},
	{
	"epoch": 0.28686320211049354,
	"grad_norm": 0.9833546876907349,
	"learning_rate": 8.369920993113824e-05,
	"loss": 1.6787,
	"step": 700
	},
	{
	"epoch": 0.3278436595548498,
	"grad_norm": 0.9632484912872314,
	"learning_rate": 7.851241347294876e-05,
	"loss": 1.683,
	"step": 800
	},
	{
	"epoch": 0.36882411699920603,
	"grad_norm": 0.9162977337837219,
	"learning_rate": 7.28236605244935e-05,
	"loss": 1.6609,
	"step": 900
	},
	{
	"epoch": 0.4098045744435622,
	"grad_norm": 1.2075759172439575,
	"learning_rate": 6.673310067383545e-05,
	"loss": 1.6527,
	"step": 1000
	},
	{
	"epoch": 0.4098045744435622,
	"eval_loss": 1.7281365394592285,
	"eval_runtime": 1238.7549,
	"eval_samples_per_second": 4.045,
	"eval_steps_per_second": 2.023,
	"step": 1000
	},
	{
	"epoch": 0.45078503188791846,
	"grad_norm": 1.0344264507293701,
	"learning_rate": 6.034795725544571e-05,
	"loss": 1.6614,
	"step": 1100
	},
	{
	"epoch": 0.49176548933227465,
	"grad_norm": 1.024488925933838,
	"learning_rate": 5.378063970050694e-05,
	"loss": 1.6817,
	"step": 1200
	},
	{
	"epoch": 0.5327459467766309,
	"grad_norm": 1.174185037612915,
	"learning_rate": 4.7146764586811296e-05,
	"loss": 1.6607,
	"step": 1300
	},
	{
	"epoch": 0.5737264042209871,
	"grad_norm": 1.417396903038025,
	"learning_rate": 4.056312022735417e-05,
	"loss": 1.6499,
	"step": 1400
	},
	{
	"epoch": 0.6147068616653434,
	"grad_norm": 0.985633909702301,
	"learning_rate": 3.414561063071644e-05,
	"loss": 1.6598,
	"step": 1500
	},
	{
	"epoch": 0.6147068616653434,
	"eval_loss": 1.7200040817260742,
	"eval_runtime": 1239.5137,
	"eval_samples_per_second": 4.043,
	"eval_steps_per_second": 2.022,
	"step": 1500
	},
	{
	"epoch": 0.6556873191096996,
	"grad_norm": 1.1905289888381958,
	"learning_rate": 2.8007215029485057e-05,
	"loss": 1.6755,
	"step": 1600
	},
	{
	"epoch": 0.6966677765540558,
	"grad_norm": 1.6823028326034546,
	"learning_rate": 2.2255998898888165e-05,
	"loss": 1.6654,
	"step": 1700
	},
	{
	"epoch": 0.7376482339984121,
	"grad_norm": 1.2739040851593018,
	"learning_rate": 1.6993211481344824e-05,
	"loss": 1.6453,
	"step": 1800
	},
	{
	"epoch": 0.7786286914427683,
	"grad_norm": 1.4436434507369995,
	"learning_rate": 1.2311503309705629e-05,
	"loss": 1.6359,
	"step": 1900
	},
	{
	"epoch": 0.8196091488871244,
	"grad_norm": 1.336064100265503,
	"learning_rate": 8.293295109403504e-06,
	"loss": 1.665,
	"step": 2000
	},
	{
	"epoch": 0.8196091488871244,
	"eval_loss": 1.7243653535842896,
	"eval_runtime": 1238.6702,
	"eval_samples_per_second": 4.045,
	"eval_steps_per_second": 2.023,
	"step": 2000
	},
	{
	"epoch": 0.8605896063314806,
	"grad_norm": 0.9651763439178467,
	"learning_rate": 5.009326794732072e-06,
	"loss": 1.6241,
	"step": 2100
	},
	{
	"epoch": 0.9015700637758369,
	"grad_norm": 1.135158896446228,
	"learning_rate": 2.5174121039404643e-06,
	"loss": 1.6527,
	"step": 2200
	},
	{
	"epoch": 0.9425505212201931,
	"grad_norm": 1.232653260231018,
	"learning_rate": 8.614207975952082e-07,
	"loss": 1.6381,
	"step": 2300
	},
	{
	"epoch": 0.9835309786645493,
	"grad_norm": 0.941377580165863,
	"learning_rate": 7.050633844443711e-08,
	"loss": 1.6307,
	"step": 2400
	}
	],
	"logging_steps": 100,
	"max_steps": 2440,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.1491335851408384e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}