ces-phase3b-lora / checkpoint-2000 /trainer_state.json

Upload folder using huggingface_hub

a18502c verified 2 months ago

7.77 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.2125069175428886,
	"eval_steps": 500,
	"global_step": 2000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.05534034311012728,
	"grad_norm": 0.7568074464797974,
	"learning_rate": 9.8e-05,
	"loss": 1.0759,
	"step": 50
	},
	{
	"epoch": 0.11068068622025456,
	"grad_norm": 0.14674387872219086,
	"learning_rate": 0.00019800000000000002,
	"loss": 0.1883,
	"step": 100
	},
	{
	"epoch": 0.16602102933038185,
	"grad_norm": 0.13491740822792053,
	"learning_rate": 0.0001998263839556516,
	"loss": 0.1823,
	"step": 150
	},
	{
	"epoch": 0.22136137244050913,
	"grad_norm": 0.11964733898639679,
	"learning_rate": 0.00019929192281085555,
	"loss": 0.1812,
	"step": 200
	},
	{
	"epoch": 0.27670171555063644,
	"grad_norm": 0.14810685813426971,
	"learning_rate": 0.0001983984765530473,
	"loss": 0.181,
	"step": 250
	},
	{
	"epoch": 0.3320420586607637,
	"grad_norm": 0.11917376518249512,
	"learning_rate": 0.0001971492753936756,
	"loss": 0.1801,
	"step": 300
	},
	{
	"epoch": 0.387382401770891,
	"grad_norm": 0.11992548406124115,
	"learning_rate": 0.0001955488357587162,
	"loss": 0.1777,
	"step": 350
	},
	{
	"epoch": 0.44272274488101826,
	"grad_norm": 0.09655219316482544,
	"learning_rate": 0.00019360294395975392,
	"loss": 0.1778,
	"step": 400
	},
	{
	"epoch": 0.49806308799114557,
	"grad_norm": 0.08890487998723984,
	"learning_rate": 0.00019131863527385433,
	"loss": 0.1776,
	"step": 450
	},
	{
	"epoch": 0.5534034311012729,
	"grad_norm": 0.09338568150997162,
	"learning_rate": 0.0001887041685078625,
	"loss": 0.1756,
	"step": 500
	},
	{
	"epoch": 0.6087437742114001,
	"grad_norm": 0.10447146743535995,
	"learning_rate": 0.0001857689961390886,
	"loss": 0.1769,
	"step": 550
	},
	{
	"epoch": 0.6640841173215274,
	"grad_norm": 0.08940507471561432,
	"learning_rate": 0.00018252373014033646,
	"loss": 0.1767,
	"step": 600
	},
	{
	"epoch": 0.7194244604316546,
	"grad_norm": 0.1015540286898613,
	"learning_rate": 0.0001789801036128327,
	"loss": 0.1749,
	"step": 650
	},
	{
	"epoch": 0.774764803541782,
	"grad_norm": 0.0866508036851883,
	"learning_rate": 0.0001751509283657702,
	"loss": 0.1765,
	"step": 700
	},
	{
	"epoch": 0.8301051466519093,
	"grad_norm": 0.08853679150342941,
	"learning_rate": 0.00017105004859583578,
	"loss": 0.1757,
	"step": 750
	},
	{
	"epoch": 0.8854454897620365,
	"grad_norm": 0.09371698647737503,
	"learning_rate": 0.00016669229083419114,
	"loss": 0.1766,
	"step": 800
	},
	{
	"epoch": 0.9407858328721638,
	"grad_norm": 0.8600781559944153,
	"learning_rate": 0.00016209341034187125,
	"loss": 0.342,
	"step": 850
	},
	{
	"epoch": 0.9961261759822911,
	"grad_norm": 0.0968112051486969,
	"learning_rate": 0.00015727003414740492,
	"loss": 0.1816,
	"step": 900
	},
	{
	"epoch": 1.0509131156613172,
	"grad_norm": 0.092947818338871,
	"learning_rate": 0.00015223960093260294,
	"loss": 0.1753,
	"step": 950
	},
	{
	"epoch": 1.1062534587714443,
	"grad_norm": 0.07544200122356415,
	"learning_rate": 0.00014702029798385264,
	"loss": 0.1747,
	"step": 1000
	},
	{
	"epoch": 1.1615938018815717,
	"grad_norm": 0.06907663494348526,
	"learning_rate": 0.00014163099543686964,
	"loss": 0.1741,
	"step": 1050
	},
	{
	"epoch": 1.2169341449916988,
	"grad_norm": 0.07450341433286667,
	"learning_rate": 0.00013609117805264063,
	"loss": 0.1754,
	"step": 1100
	},
	{
	"epoch": 1.2722744881018262,
	"grad_norm": 0.09569600224494934,
	"learning_rate": 0.0001304208747712189,
	"loss": 0.1732,
	"step": 1150
	},
	{
	"epoch": 1.3276148312119536,
	"grad_norm": 0.08210264891386032,
	"learning_rate": 0.00012464058629806633,
	"loss": 0.1716,
	"step": 1200
	},
	{
	"epoch": 1.3829551743220807,
	"grad_norm": 0.06857864558696747,
	"learning_rate": 0.00011877121098475106,
	"loss": 0.1728,
	"step": 1250
	},
	{
	"epoch": 1.438295517432208,
	"grad_norm": 0.11595187336206436,
	"learning_rate": 0.00011283396927197472,
	"loss": 0.174,
	"step": 1300
	},
	{
	"epoch": 1.4936358605423354,
	"grad_norm": 0.07529956847429276,
	"learning_rate": 0.00010685032696810226,
	"loss": 0.1733,
	"step": 1350
	},
	{
	"epoch": 1.5489762036524626,
	"grad_norm": 0.0644264817237854,
	"learning_rate": 0.00010084191764057676,
	"loss": 0.1738,
	"step": 1400
	},
	{
	"epoch": 1.60431654676259,
	"grad_norm": 0.060298092663288116,
	"learning_rate": 9.483046440080949e-05,
	"loss": 0.1717,
	"step": 1450
	},
	{
	"epoch": 1.6596568898727173,
	"grad_norm": 0.059512991458177567,
	"learning_rate": 8.883770136532834e-05,
	"loss": 0.1735,
	"step": 1500
	},
	{
	"epoch": 1.7149972329828445,
	"grad_norm": 0.0744907408952713,
	"learning_rate": 8.288529507713752e-05,
	"loss": 0.1722,
	"step": 1550
	},
	{
	"epoch": 1.7703375760929718,
	"grad_norm": 0.05729057267308235,
	"learning_rate": 7.699476617138598e-05,
	"loss": 0.1728,
	"step": 1600
	},
	{
	"epoch": 1.8256779192030992,
	"grad_norm": 0.06146302446722984,
	"learning_rate": 7.118741156855904e-05,
	"loss": 0.1714,
	"step": 1650
	},
	{
	"epoch": 1.8810182623132263,
	"grad_norm": 0.08402097970247269,
	"learning_rate": 6.548422747649902e-05,
	"loss": 0.1711,
	"step": 1700
	},
	{
	"epoch": 1.9363586054233535,
	"grad_norm": 0.06482277065515518,
	"learning_rate": 5.990583347963793e-05,
	"loss": 0.1714,
	"step": 1750
	},
	{
	"epoch": 1.991698948533481,
	"grad_norm": 0.06089329719543457,
	"learning_rate": 5.44723979898939e-05,
	"loss": 0.1711,
	"step": 1800
	},
	{
	"epoch": 2.0464858882125068,
	"grad_norm": 0.053777534514665604,
	"learning_rate": 4.9203565328759604e-05,
	"loss": 0.1708,
	"step": 1850
	},
	{
	"epoch": 2.1018262313226344,
	"grad_norm": 0.052074234932661057,
	"learning_rate": 4.411838470421454e-05,
	"loss": 0.169,
	"step": 1900
	},
	{
	"epoch": 2.1571665744327615,
	"grad_norm": 0.04878537356853485,
	"learning_rate": 3.923524133924069e-05,
	"loss": 0.1705,
	"step": 1950
	},
	{
	"epoch": 2.2125069175428886,
	"grad_norm": 0.06310058385133743,
	"learning_rate": 3.4571790000943973e-05,
	"loss": 0.1706,
	"step": 2000
	}
	],
	"logging_steps": 50,
	"max_steps": 2712,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.341042688193413e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}