16-clusters-balanced-10 / trainer_state.json

MHGanainy/16-clusters-balanced-10

21f3454 verified over 1 year ago

7.95 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 4029,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.02482005460412013,
	"grad_norm": 0.1369238644838333,
	"learning_rate": 6.666666666666667e-06,
	"loss": 2.4042,
	"step": 100
	},
	{
	"epoch": 0.04964010920824026,
	"grad_norm": 0.17341101169586182,
	"learning_rate": 1.3333333333333333e-05,
	"loss": 2.3581,
	"step": 200
	},
	{
	"epoch": 0.07446016381236038,
	"grad_norm": 0.30298689007759094,
	"learning_rate": 2e-05,
	"loss": 2.2979,
	"step": 300
	},
	{
	"epoch": 0.09928021841648052,
	"grad_norm": 0.4181392788887024,
	"learning_rate": 1.9964532702725803e-05,
	"loss": 2.2736,
	"step": 400
	},
	{
	"epoch": 0.12410027302060064,
	"grad_norm": 0.4833754301071167,
	"learning_rate": 1.9858382396738395e-05,
	"loss": 2.2352,
	"step": 500
	},
	{
	"epoch": 0.14892032762472077,
	"grad_norm": 0.5508949756622314,
	"learning_rate": 1.9682302054929414e-05,
	"loss": 2.1951,
	"step": 600
	},
	{
	"epoch": 0.17374038222884092,
	"grad_norm": 0.5856565833091736,
	"learning_rate": 1.943754069606428e-05,
	"loss": 2.1662,
	"step": 700
	},
	{
	"epoch": 0.19856043683296104,
	"grad_norm": 0.5611233115196228,
	"learning_rate": 1.9125834524918215e-05,
	"loss": 2.1815,
	"step": 800
	},
	{
	"epoch": 0.22338049143708116,
	"grad_norm": 0.6802138090133667,
	"learning_rate": 1.8749394616578068e-05,
	"loss": 2.1675,
	"step": 900
	},
	{
	"epoch": 0.2482005460412013,
	"grad_norm": 0.6513592004776001,
	"learning_rate": 1.8310891232270827e-05,
	"loss": 2.1402,
	"step": 1000
	},
	{
	"epoch": 0.2730206006453214,
	"grad_norm": 0.6889598369598389,
	"learning_rate": 1.781343487797389e-05,
	"loss": 2.1334,
	"step": 1100
	},
	{
	"epoch": 0.29784065524944153,
	"grad_norm": 0.7928256988525391,
	"learning_rate": 1.7260554240167017e-05,
	"loss": 2.1295,
	"step": 1200
	},
	{
	"epoch": 0.32266070985356166,
	"grad_norm": 0.7162489295005798,
	"learning_rate": 1.665617115523785e-05,
	"loss": 2.1232,
	"step": 1300
	},
	{
	"epoch": 0.34748076445768183,
	"grad_norm": 0.7136086225509644,
	"learning_rate": 1.6004572790094535e-05,
	"loss": 2.1148,
	"step": 1400
	},
	{
	"epoch": 0.37230081906180196,
	"grad_norm": 0.7688263654708862,
	"learning_rate": 1.531038123132105e-05,
	"loss": 2.0873,
	"step": 1500
	},
	{
	"epoch": 0.3971208736659221,
	"grad_norm": 0.772521436214447,
	"learning_rate": 1.4578520698593441e-05,
	"loss": 2.117,
	"step": 1600
	},
	{
	"epoch": 0.4219409282700422,
	"grad_norm": 1.010330080986023,
	"learning_rate": 1.3814182614927217e-05,
	"loss": 2.071,
	"step": 1700
	},
	{
	"epoch": 0.4467609828741623,
	"grad_norm": 0.6752054691314697,
	"learning_rate": 1.3022788781528653e-05,
	"loss": 2.0636,
	"step": 1800
	},
	{
	"epoch": 0.47158103747828245,
	"grad_norm": 0.841232180595398,
	"learning_rate": 1.220995291846777e-05,
	"loss": 2.0532,
	"step": 1900
	},
	{
	"epoch": 0.4964010920824026,
	"grad_norm": 0.7984778881072998,
	"learning_rate": 1.1381440843982634e-05,
	"loss": 2.0438,
	"step": 2000
	},
	{
	"epoch": 0.5212211466865228,
	"grad_norm": 0.8068585395812988,
	"learning_rate": 1.0543129574881446e-05,
	"loss": 2.0687,
	"step": 2100
	},
	{
	"epoch": 0.5460412012906428,
	"grad_norm": 0.8497598767280579,
	"learning_rate": 9.700965638162112e-06,
	"loss": 2.0477,
	"step": 2200
	},
	{
	"epoch": 0.570861255894763,
	"grad_norm": 0.7474705576896667,
	"learning_rate": 8.860922889564078e-06,
	"loss": 2.0429,
	"step": 2300
	},
	{
	"epoch": 0.5956813104988831,
	"grad_norm": 1.0781651735305786,
	"learning_rate": 8.028960138264857e-06,
	"loss": 2.0389,
	"step": 2400
	},
	{
	"epoch": 0.6205013651030032,
	"grad_norm": 0.8750322461128235,
	"learning_rate": 7.21097887830873e-06,
	"loss": 2.046,
	"step": 2500
	},
	{
	"epoch": 0.6453214197071233,
	"grad_norm": 0.9259145855903625,
	"learning_rate": 6.4127814265980095e-06,
	"loss": 2.0243,
	"step": 2600
	},
	{
	"epoch": 0.6701414743112435,
	"grad_norm": 1.1625196933746338,
	"learning_rate": 5.640029764393366e-06,
	"loss": 2.0513,
	"step": 2700
	},
	{
	"epoch": 0.6949615289153637,
	"grad_norm": 0.8271129727363586,
	"learning_rate": 4.8982053742793025e-06,
	"loss": 2.0228,
	"step": 2800
	},
	{
	"epoch": 0.7197815835194837,
	"grad_norm": 0.7196031212806702,
	"learning_rate": 4.1925703574897115e-06,
	"loss": 2.0496,
	"step": 2900
	},
	{
	"epoch": 0.7446016381236039,
	"grad_norm": 0.7880265712738037,
	"learning_rate": 3.528130107406099e-06,
	"loss": 2.0145,
	"step": 3000
	},
	{
	"epoch": 0.769421692727724,
	"grad_norm": 0.909106433391571,
	"learning_rate": 2.909597804002603e-06,
	"loss": 2.0437,
	"step": 3100
	},
	{
	"epoch": 0.7942417473318442,
	"grad_norm": 1.2606161832809448,
	"learning_rate": 2.341360981094921e-06,
	"loss": 2.0443,
	"step": 3200
	},
	{
	"epoch": 0.8190618019359642,
	"grad_norm": 0.795652449131012,
	"learning_rate": 1.8274504035470942e-06,
	"loss": 2.0568,
	"step": 3300
	},
	{
	"epoch": 0.8438818565400844,
	"grad_norm": 0.8904260993003845,
	"learning_rate": 1.3715114752043746e-06,
	"loss": 2.0787,
	"step": 3400
	},
	{
	"epoch": 0.8687019111442045,
	"grad_norm": 1.0925287008285522,
	"learning_rate": 9.767783803688414e-07,
	"loss": 2.045,
	"step": 3500
	},
	{
	"epoch": 0.8935219657483247,
	"grad_norm": 0.799608588218689,
	"learning_rate": 6.460511422441984e-07,
	"loss": 2.0167,
	"step": 3600
	},
	{
	"epoch": 0.9183420203524447,
	"grad_norm": 0.9094216227531433,
	"learning_rate": 3.8167576108468994e-07,
	"loss": 2.057,
	"step": 3700
	},
	{
	"epoch": 0.9431620749565649,
	"grad_norm": 0.8395094871520996,
	"learning_rate": 1.855275729374284e-07,
	"loss": 2.0425,
	"step": 3800
	},
	{
	"epoch": 0.9679821295606851,
	"grad_norm": 0.8606423735618591,
	"learning_rate": 5.89979470221802e-08,
	"loss": 2.0208,
	"step": 3900
	},
	{
	"epoch": 0.9928021841648051,
	"grad_norm": 0.8908767700195312,
	"learning_rate": 2.9844161102077218e-09,
	"loss": 2.0512,
	"step": 4000
	},
	{
	"epoch": 1.0,
	"step": 4029,
	"total_flos": 7.32108351012864e+16,
	"train_loss": 2.105005581936963,
	"train_runtime": 1251.4031,
	"train_samples_per_second": 6.438,
	"train_steps_per_second": 3.22
	}
	],
	"logging_steps": 100,
	"max_steps": 4029,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 7.32108351012864e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}