Daniele Cangi

Add Reson 4.5 model files

9db84cb 8 months ago

8.99 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.0,
	"eval_steps": 500,
	"global_step": 240,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.041928721174004195,
	"grad_norm": 5.873931884765625,
	"learning_rate": 3.333333333333333e-07,
	"loss": 1.862,
	"step": 5
	},
	{
	"epoch": 0.08385744234800839,
	"grad_norm": 6.9156107902526855,
	"learning_rate": 7.5e-07,
	"loss": 1.7854,
	"step": 10
	},
	{
	"epoch": 0.12578616352201258,
	"grad_norm": 5.825167179107666,
	"learning_rate": 1.1666666666666668e-06,
	"loss": 1.5769,
	"step": 15
	},
	{
	"epoch": 0.16771488469601678,
	"grad_norm": 4.851530075073242,
	"learning_rate": 1.5833333333333331e-06,
	"loss": 1.6161,
	"step": 20
	},
	{
	"epoch": 0.20964360587002095,
	"grad_norm": 4.060953617095947,
	"learning_rate": 2e-06,
	"loss": 1.7598,
	"step": 25
	},
	{
	"epoch": 0.25157232704402516,
	"grad_norm": 2.5847902297973633,
	"learning_rate": 1.997356916700572e-06,
	"loss": 1.3625,
	"step": 30
	},
	{
	"epoch": 0.29350104821802936,
	"grad_norm": 3.8893041610717773,
	"learning_rate": 1.9894416385809443e-06,
	"loss": 1.4926,
	"step": 35
	},
	{
	"epoch": 0.33542976939203356,
	"grad_norm": 3.7155327796936035,
	"learning_rate": 1.976296007119933e-06,
	"loss": 1.5143,
	"step": 40
	},
	{
	"epoch": 0.37735849056603776,
	"grad_norm": 3.918890953063965,
	"learning_rate": 1.9579895123154886e-06,
	"loss": 1.5692,
	"step": 45
	},
	{
	"epoch": 0.4192872117400419,
	"grad_norm": 5.501850128173828,
	"learning_rate": 1.9346189253489886e-06,
	"loss": 1.9754,
	"step": 50
	},
	{
	"epoch": 0.4612159329140461,
	"grad_norm": 2.250485897064209,
	"learning_rate": 1.9063077870366499e-06,
	"loss": 1.5317,
	"step": 55
	},
	{
	"epoch": 0.5031446540880503,
	"grad_norm": 4.778130531311035,
	"learning_rate": 1.8732057547721957e-06,
	"loss": 1.6225,
	"step": 60
	},
	{
	"epoch": 0.5450733752620545,
	"grad_norm": 5.034369468688965,
	"learning_rate": 1.8354878114129364e-06,
	"loss": 1.9014,
	"step": 65
	},
	{
	"epoch": 0.5870020964360587,
	"grad_norm": 3.4649276733398438,
	"learning_rate": 1.7933533402912351e-06,
	"loss": 1.5783,
	"step": 70
	},
	{
	"epoch": 0.6289308176100629,
	"grad_norm": 4.354034900665283,
	"learning_rate": 1.7470250712409959e-06,
	"loss": 1.4057,
	"step": 75
	},
	{
	"epoch": 0.6708595387840671,
	"grad_norm": 3.999058961868286,
	"learning_rate": 1.6967479032106548e-06,
	"loss": 1.5335,
	"step": 80
	},
	{
	"epoch": 0.7127882599580713,
	"grad_norm": 3.4833269119262695,
	"learning_rate": 1.6427876096865393e-06,
	"loss": 1.1902,
	"step": 85
	},
	{
	"epoch": 0.7547169811320755,
	"grad_norm": 2.8304104804992676,
	"learning_rate": 1.5854294337699405e-06,
	"loss": 1.2191,
	"step": 90
	},
	{
	"epoch": 0.7966457023060797,
	"grad_norm": 4.565997123718262,
	"learning_rate": 1.52497658033456e-06,
	"loss": 1.4723,
	"step": 95
	},
	{
	"epoch": 0.8385744234800838,
	"grad_norm": 3.578016996383667,
	"learning_rate": 1.461748613235034e-06,
	"loss": 1.0579,
	"step": 100
	},
	{
	"epoch": 0.8805031446540881,
	"grad_norm": 4.267149925231934,
	"learning_rate": 1.3960797660391568e-06,
	"loss": 1.4263,
	"step": 105
	},
	{
	"epoch": 0.9224318658280922,
	"grad_norm": 3.9493331909179688,
	"learning_rate": 1.3283171752135611e-06,
	"loss": 1.7828,
	"step": 110
	},
	{
	"epoch": 0.9643605870020965,
	"grad_norm": 3.2271363735198975,
	"learning_rate": 1.2588190451025207e-06,
	"loss": 1.2667,
	"step": 115
	},
	{
	"epoch": 1.0,
	"grad_norm": 12.765212059020996,
	"learning_rate": 1.1879527544001117e-06,
	"loss": 1.5189,
	"step": 120
	},
	{
	"epoch": 1.0419287211740043,
	"grad_norm": 2.5988142490386963,
	"learning_rate": 1.1160929141252301e-06,
	"loss": 1.3693,
	"step": 125
	},
	{
	"epoch": 1.0838574423480083,
	"grad_norm": 3.980133533477783,
	"learning_rate": 1.043619387365336e-06,
	"loss": 1.4213,
	"step": 130
	},
	{
	"epoch": 1.1257861635220126,
	"grad_norm": 3.8095943927764893,
	"learning_rate": 9.709152812568885e-07,
	"loss": 1.2731,
	"step": 135
	},
	{
	"epoch": 1.1677148846960168,
	"grad_norm": 4.488629341125488,
	"learning_rate": 8.983649218171981e-07,
	"loss": 1.4819,
	"step": 140
	},
	{
	"epoch": 1.209643605870021,
	"grad_norm": 3.532264232635498,
	"learning_rate": 8.263518223330696e-07,
	"loss": 1.3564,
	"step": 145
	},
	{
	"epoch": 1.251572327044025,
	"grad_norm": 4.543543815612793,
	"learning_rate": 7.552566560456761e-07,
	"loss": 1.2954,
	"step": 150
	},
	{
	"epoch": 1.2935010482180294,
	"grad_norm": 3.2413134574890137,
	"learning_rate": 6.854552438483865e-07,
	"loss": 1.2842,
	"step": 155
	},
	{
	"epoch": 1.3354297693920336,
	"grad_norm": 3.2103683948516846,
	"learning_rate": 6.173165676349102e-07,
	"loss": 1.2546,
	"step": 160
	},
	{
	"epoch": 1.3773584905660377,
	"grad_norm": 2.752671957015991,
	"learning_rate": 5.512008197995378e-07,
	"loss": 1.2626,
	"step": 165
	},
	{
	"epoch": 1.419287211740042,
	"grad_norm": 3.4775447845458984,
	"learning_rate": 4.874574992001348e-07,
	"loss": 1.2031,
	"step": 170
	},
	{
	"epoch": 1.4612159329140462,
	"grad_norm": 4.845881938934326,
	"learning_rate": 4.2642356364895417e-07,
	"loss": 1.5397,
	"step": 175
	},
	{
	"epoch": 1.5031446540880502,
	"grad_norm": 3.8967208862304688,
	"learning_rate": 3.684216486975026e-07,
	"loss": 1.4725,
	"step": 180
	},
	{
	"epoch": 1.5450733752620545,
	"grad_norm": 3.4233365058898926,
	"learning_rate": 3.137583621312665e-07,
	"loss": 1.2935,
	"step": 185
	},
	{
	"epoch": 1.5870020964360587,
	"grad_norm": 3.8637824058532715,
	"learning_rate": 2.62722663189876e-07,
	"loss": 1.2836,
	"step": 190
	},
	{
	"epoch": 1.6289308176100628,
	"grad_norm": 4.068167209625244,
	"learning_rate": 2.1558433508042427e-07,
	"loss": 1.1832,
	"step": 195
	},
	{
	"epoch": 1.6708595387840672,
	"grad_norm": 3.147029399871826,
	"learning_rate": 1.7259255885848944e-07,
	"loss": 1.2623,
	"step": 200
	},
	{
	"epoch": 1.7127882599580713,
	"grad_norm": 2.986510992050171,
	"learning_rate": 1.3397459621556128e-07,
	"loss": 1.0893,
	"step": 205
	},
	{
	"epoch": 1.7547169811320755,
	"grad_norm": 3.7319693565368652,
	"learning_rate": 9.993458813587884e-08,
	"loss": 1.2304,
	"step": 210
	},
	{
	"epoch": 1.7966457023060798,
	"grad_norm": 4.113098621368408,
	"learning_rate": 7.065247577317745e-08,
	"loss": 1.1455,
	"step": 215
	},
	{
	"epoch": 1.8385744234800838,
	"grad_norm": 4.701733589172363,
	"learning_rate": 4.6283049251773176e-08,
	"loss": 1.1915,
	"step": 220
	},
	{
	"epoch": 1.880503144654088,
	"grad_norm": 3.9321887493133545,
	"learning_rate": 2.6955129420176193e-08,
	"loss": 1.2247,
	"step": 225
	},
	{
	"epoch": 1.9224318658280923,
	"grad_norm": 3.6867830753326416,
	"learning_rate": 1.2770886882625952e-08,
	"loss": 1.2779,
	"step": 230
	},
	{
	"epoch": 1.9643605870020964,
	"grad_norm": 2.951756000518799,
	"learning_rate": 3.805301908254455e-09,
	"loss": 1.2652,
	"step": 235
	},
	{
	"epoch": 2.0,
	"grad_norm": 7.836752414703369,
	"learning_rate": 1.0576807289253142e-10,
	"loss": 1.4392,
	"step": 240
	}
	],
	"logging_steps": 5,
	"max_steps": 240,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2077792876363776.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}