16-clusters-imbalanced-10 / trainer_state.json

MHGanainy/16-clusters-imbalanced-10

131ef5b verified over 1 year ago

8.65 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 4413,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.022660321776569226,
	"grad_norm": 0.09582193195819855,
	"learning_rate": 6.666666666666667e-06,
	"loss": 2.4053,
	"step": 100
	},
	{
	"epoch": 0.04532064355313845,
	"grad_norm": 0.1730571836233139,
	"learning_rate": 1.3333333333333333e-05,
	"loss": 2.3825,
	"step": 200
	},
	{
	"epoch": 0.06798096532970768,
	"grad_norm": 0.3355884253978729,
	"learning_rate": 2e-05,
	"loss": 2.356,
	"step": 300
	},
	{
	"epoch": 0.0906412871062769,
	"grad_norm": 0.37008875608444214,
	"learning_rate": 1.9970843111690533e-05,
	"loss": 2.2935,
	"step": 400
	},
	{
	"epoch": 0.11330160888284614,
	"grad_norm": 0.5142782330513,
	"learning_rate": 1.9883542471589315e-05,
	"loss": 2.2561,
	"step": 500
	},
	{
	"epoch": 0.13596193065941536,
	"grad_norm": 0.42936116456985474,
	"learning_rate": 1.9738607162698895e-05,
	"loss": 2.2424,
	"step": 600
	},
	{
	"epoch": 0.1586222524359846,
	"grad_norm": 0.537521243095398,
	"learning_rate": 1.9536882357541958e-05,
	"loss": 2.242,
	"step": 700
	},
	{
	"epoch": 0.1812825742125538,
	"grad_norm": 0.602051854133606,
	"learning_rate": 1.927954438964115e-05,
	"loss": 2.214,
	"step": 800
	},
	{
	"epoch": 0.20394289598912305,
	"grad_norm": 0.5285528898239136,
	"learning_rate": 1.8968093893874042e-05,
	"loss": 2.1943,
	"step": 900
	},
	{
	"epoch": 0.22660321776569228,
	"grad_norm": 0.6450159549713135,
	"learning_rate": 1.8604347055704433e-05,
	"loss": 2.1566,
	"step": 1000
	},
	{
	"epoch": 0.2492635395422615,
	"grad_norm": 0.6594407558441162,
	"learning_rate": 1.8190425020319016e-05,
	"loss": 2.1578,
	"step": 1100
	},
	{
	"epoch": 0.27192386131883073,
	"grad_norm": 0.6740846633911133,
	"learning_rate": 1.7728741523428696e-05,
	"loss": 2.1578,
	"step": 1200
	},
	{
	"epoch": 0.29458418309539997,
	"grad_norm": 0.6054636240005493,
	"learning_rate": 1.722198881586411e-05,
	"loss": 2.1301,
	"step": 1300
	},
	{
	"epoch": 0.3172445048719692,
	"grad_norm": 0.5829110145568848,
	"learning_rate": 1.667312196404425e-05,
	"loss": 2.1366,
	"step": 1400
	},
	{
	"epoch": 0.3399048266485384,
	"grad_norm": 0.6636696457862854,
	"learning_rate": 1.6085341617868172e-05,
	"loss": 2.1301,
	"step": 1500
	},
	{
	"epoch": 0.3625651484251076,
	"grad_norm": 0.8352382779121399,
	"learning_rate": 1.546207534651667e-05,
	"loss": 2.1157,
	"step": 1600
	},
	{
	"epoch": 0.38522547020167686,
	"grad_norm": 0.6855395436286926,
	"learning_rate": 1.4806957651001911e-05,
	"loss": 2.1084,
	"step": 1700
	},
	{
	"epoch": 0.4078857919782461,
	"grad_norm": 0.8896074891090393,
	"learning_rate": 1.4123808770019433e-05,
	"loss": 2.1494,
	"step": 1800
	},
	{
	"epoch": 0.43054611375481533,
	"grad_norm": 0.7051901817321777,
	"learning_rate": 1.3416612402693543e-05,
	"loss": 2.1406,
	"step": 1900
	},
	{
	"epoch": 0.45320643553138457,
	"grad_norm": 0.9859122633934021,
	"learning_rate": 1.2689492478123242e-05,
	"loss": 2.1142,
	"step": 2000
	},
	{
	"epoch": 0.47586675730795375,
	"grad_norm": 0.9501364827156067,
	"learning_rate": 1.1946689107194183e-05,
	"loss": 2.091,
	"step": 2100
	},
	{
	"epoch": 0.498527079084523,
	"grad_norm": 1.0703001022338867,
	"learning_rate": 1.119253385689078e-05,
	"loss": 2.0765,
	"step": 2200
	},
	{
	"epoch": 0.5211874008610923,
	"grad_norm": 0.669400691986084,
	"learning_rate": 1.0431424491293254e-05,
	"loss": 2.0824,
	"step": 2300
	},
	{
	"epoch": 0.5438477226376615,
	"grad_norm": 0.7835758924484253,
	"learning_rate": 9.667799326554403e-06,
	"loss": 2.0818,
	"step": 2400
	},
	{
	"epoch": 0.5665080444142306,
	"grad_norm": 0.8207575082778931,
	"learning_rate": 8.906111349401949e-06,
	"loss": 2.1016,
	"step": 2500
	},
	{
	"epoch": 0.5891683661907999,
	"grad_norm": 0.8124341368675232,
	"learning_rate": 8.150802250091193e-06,
	"loss": 2.0647,
	"step": 2600
	},
	{
	"epoch": 0.6118286879673691,
	"grad_norm": 0.8744191527366638,
	"learning_rate": 7.406276521231679e-06,
	"loss": 2.0657,
	"step": 2700
	},
	{
	"epoch": 0.6344890097439384,
	"grad_norm": 1.0869206190109253,
	"learning_rate": 6.676875773527383e-06,
	"loss": 2.0547,
	"step": 2800
	},
	{
	"epoch": 0.6571493315205076,
	"grad_norm": 0.7237268686294556,
	"learning_rate": 5.966853418205035e-06,
	"loss": 2.124,
	"step": 2900
	},
	{
	"epoch": 0.6798096532970768,
	"grad_norm": 0.9836551547050476,
	"learning_rate": 5.2803498637669055e-06,
	"loss": 2.0877,
	"step": 3000
	},
	{
	"epoch": 0.7024699750736461,
	"grad_norm": 0.8831650614738464,
	"learning_rate": 4.621368371705162e-06,
	"loss": 2.0978,
	"step": 3100
	},
	{
	"epoch": 0.7251302968502152,
	"grad_norm": 0.8482229709625244,
	"learning_rate": 3.993751711972204e-06,
	"loss": 2.075,
	"step": 3200
	},
	{
	"epoch": 0.7477906186267845,
	"grad_norm": 0.8325951099395752,
	"learning_rate": 3.401159754337836e-06,
	"loss": 2.1016,
	"step": 3300
	},
	{
	"epoch": 0.7704509404033537,
	"grad_norm": 1.3220783472061157,
	"learning_rate": 2.8470481263064255e-06,
	"loss": 2.1096,
	"step": 3400
	},
	{
	"epoch": 0.793111262179923,
	"grad_norm": 0.8809642195701599,
	"learning_rate": 2.3346480620478685e-06,
	"loss": 2.079,
	"step": 3500
	},
	{
	"epoch": 0.8157715839564922,
	"grad_norm": 0.9344497919082642,
	"learning_rate": 1.866947559850839e-06,
	"loss": 2.1025,
	"step": 3600
	},
	{
	"epoch": 0.8384319057330614,
	"grad_norm": 0.9643566012382507,
	"learning_rate": 1.446673957976298e-06,
	"loss": 2.1116,
	"step": 3700
	},
	{
	"epoch": 0.8610922275096307,
	"grad_norm": 1.0109236240386963,
	"learning_rate": 1.0762780305181064e-06,
	"loss": 2.0662,
	"step": 3800
	},
	{
	"epoch": 0.8837525492861998,
	"grad_norm": 0.9456806182861328,
	"learning_rate": 7.579196960136958e-07,
	"loss": 2.0815,
	"step": 3900
	},
	{
	"epoch": 0.9064128710627691,
	"grad_norm": 1.229778528213501,
	"learning_rate": 4.934554221433741e-07,
	"loss": 2.0636,
	"step": 4000
	},
	{
	"epoch": 0.9290731928393383,
	"grad_norm": 0.8360131978988647,
	"learning_rate": 2.8442739996615956e-07,
	"loss": 2.0465,
	"step": 4100
	},
	{
	"epoch": 0.9517335146159075,
	"grad_norm": 0.6662079691886902,
	"learning_rate": 1.3205455082128228e-07,
	"loss": 2.0419,
	"step": 4200
	},
	{
	"epoch": 0.9743938363924768,
	"grad_norm": 0.8761087656021118,
	"learning_rate": 3.7225418337528685e-08,
	"loss": 2.0664,
	"step": 4300
	},
	{
	"epoch": 0.997054158169046,
	"grad_norm": 1.0942589044570923,
	"learning_rate": 4.929869997571945e-10,
	"loss": 2.0249,
	"step": 4400
	},
	{
	"epoch": 1.0,
	"step": 4413,
	"total_flos": 8.01984399409152e+16,
	"train_loss": 2.1353629073560736,
	"train_runtime": 1379.8897,
	"train_samples_per_second": 6.396,
	"train_steps_per_second": 3.198
	}
	],
	"logging_steps": 100,
	"max_steps": 4413,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 8.01984399409152e+16,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}