Reson / training_logs /checkpoint-240 /trainer_state.json
Daniele Cangi
Add Reson 4.5 model files
9db84cb
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.041928721174004195,
"grad_norm": 5.873931884765625,
"learning_rate": 3.333333333333333e-07,
"loss": 1.862,
"step": 5
},
{
"epoch": 0.08385744234800839,
"grad_norm": 6.9156107902526855,
"learning_rate": 7.5e-07,
"loss": 1.7854,
"step": 10
},
{
"epoch": 0.12578616352201258,
"grad_norm": 5.825167179107666,
"learning_rate": 1.1666666666666668e-06,
"loss": 1.5769,
"step": 15
},
{
"epoch": 0.16771488469601678,
"grad_norm": 4.851530075073242,
"learning_rate": 1.5833333333333331e-06,
"loss": 1.6161,
"step": 20
},
{
"epoch": 0.20964360587002095,
"grad_norm": 4.060953617095947,
"learning_rate": 2e-06,
"loss": 1.7598,
"step": 25
},
{
"epoch": 0.25157232704402516,
"grad_norm": 2.5847902297973633,
"learning_rate": 1.997356916700572e-06,
"loss": 1.3625,
"step": 30
},
{
"epoch": 0.29350104821802936,
"grad_norm": 3.8893041610717773,
"learning_rate": 1.9894416385809443e-06,
"loss": 1.4926,
"step": 35
},
{
"epoch": 0.33542976939203356,
"grad_norm": 3.7155327796936035,
"learning_rate": 1.976296007119933e-06,
"loss": 1.5143,
"step": 40
},
{
"epoch": 0.37735849056603776,
"grad_norm": 3.918890953063965,
"learning_rate": 1.9579895123154886e-06,
"loss": 1.5692,
"step": 45
},
{
"epoch": 0.4192872117400419,
"grad_norm": 5.501850128173828,
"learning_rate": 1.9346189253489886e-06,
"loss": 1.9754,
"step": 50
},
{
"epoch": 0.4612159329140461,
"grad_norm": 2.250485897064209,
"learning_rate": 1.9063077870366499e-06,
"loss": 1.5317,
"step": 55
},
{
"epoch": 0.5031446540880503,
"grad_norm": 4.778130531311035,
"learning_rate": 1.8732057547721957e-06,
"loss": 1.6225,
"step": 60
},
{
"epoch": 0.5450733752620545,
"grad_norm": 5.034369468688965,
"learning_rate": 1.8354878114129364e-06,
"loss": 1.9014,
"step": 65
},
{
"epoch": 0.5870020964360587,
"grad_norm": 3.4649276733398438,
"learning_rate": 1.7933533402912351e-06,
"loss": 1.5783,
"step": 70
},
{
"epoch": 0.6289308176100629,
"grad_norm": 4.354034900665283,
"learning_rate": 1.7470250712409959e-06,
"loss": 1.4057,
"step": 75
},
{
"epoch": 0.6708595387840671,
"grad_norm": 3.999058961868286,
"learning_rate": 1.6967479032106548e-06,
"loss": 1.5335,
"step": 80
},
{
"epoch": 0.7127882599580713,
"grad_norm": 3.4833269119262695,
"learning_rate": 1.6427876096865393e-06,
"loss": 1.1902,
"step": 85
},
{
"epoch": 0.7547169811320755,
"grad_norm": 2.8304104804992676,
"learning_rate": 1.5854294337699405e-06,
"loss": 1.2191,
"step": 90
},
{
"epoch": 0.7966457023060797,
"grad_norm": 4.565997123718262,
"learning_rate": 1.52497658033456e-06,
"loss": 1.4723,
"step": 95
},
{
"epoch": 0.8385744234800838,
"grad_norm": 3.578016996383667,
"learning_rate": 1.461748613235034e-06,
"loss": 1.0579,
"step": 100
},
{
"epoch": 0.8805031446540881,
"grad_norm": 4.267149925231934,
"learning_rate": 1.3960797660391568e-06,
"loss": 1.4263,
"step": 105
},
{
"epoch": 0.9224318658280922,
"grad_norm": 3.9493331909179688,
"learning_rate": 1.3283171752135611e-06,
"loss": 1.7828,
"step": 110
},
{
"epoch": 0.9643605870020965,
"grad_norm": 3.2271363735198975,
"learning_rate": 1.2588190451025207e-06,
"loss": 1.2667,
"step": 115
},
{
"epoch": 1.0,
"grad_norm": 12.765212059020996,
"learning_rate": 1.1879527544001117e-06,
"loss": 1.5189,
"step": 120
},
{
"epoch": 1.0419287211740043,
"grad_norm": 2.5988142490386963,
"learning_rate": 1.1160929141252301e-06,
"loss": 1.3693,
"step": 125
},
{
"epoch": 1.0838574423480083,
"grad_norm": 3.980133533477783,
"learning_rate": 1.043619387365336e-06,
"loss": 1.4213,
"step": 130
},
{
"epoch": 1.1257861635220126,
"grad_norm": 3.8095943927764893,
"learning_rate": 9.709152812568885e-07,
"loss": 1.2731,
"step": 135
},
{
"epoch": 1.1677148846960168,
"grad_norm": 4.488629341125488,
"learning_rate": 8.983649218171981e-07,
"loss": 1.4819,
"step": 140
},
{
"epoch": 1.209643605870021,
"grad_norm": 3.532264232635498,
"learning_rate": 8.263518223330696e-07,
"loss": 1.3564,
"step": 145
},
{
"epoch": 1.251572327044025,
"grad_norm": 4.543543815612793,
"learning_rate": 7.552566560456761e-07,
"loss": 1.2954,
"step": 150
},
{
"epoch": 1.2935010482180294,
"grad_norm": 3.2413134574890137,
"learning_rate": 6.854552438483865e-07,
"loss": 1.2842,
"step": 155
},
{
"epoch": 1.3354297693920336,
"grad_norm": 3.2103683948516846,
"learning_rate": 6.173165676349102e-07,
"loss": 1.2546,
"step": 160
},
{
"epoch": 1.3773584905660377,
"grad_norm": 2.752671957015991,
"learning_rate": 5.512008197995378e-07,
"loss": 1.2626,
"step": 165
},
{
"epoch": 1.419287211740042,
"grad_norm": 3.4775447845458984,
"learning_rate": 4.874574992001348e-07,
"loss": 1.2031,
"step": 170
},
{
"epoch": 1.4612159329140462,
"grad_norm": 4.845881938934326,
"learning_rate": 4.2642356364895417e-07,
"loss": 1.5397,
"step": 175
},
{
"epoch": 1.5031446540880502,
"grad_norm": 3.8967208862304688,
"learning_rate": 3.684216486975026e-07,
"loss": 1.4725,
"step": 180
},
{
"epoch": 1.5450733752620545,
"grad_norm": 3.4233365058898926,
"learning_rate": 3.137583621312665e-07,
"loss": 1.2935,
"step": 185
},
{
"epoch": 1.5870020964360587,
"grad_norm": 3.8637824058532715,
"learning_rate": 2.62722663189876e-07,
"loss": 1.2836,
"step": 190
},
{
"epoch": 1.6289308176100628,
"grad_norm": 4.068167209625244,
"learning_rate": 2.1558433508042427e-07,
"loss": 1.1832,
"step": 195
},
{
"epoch": 1.6708595387840672,
"grad_norm": 3.147029399871826,
"learning_rate": 1.7259255885848944e-07,
"loss": 1.2623,
"step": 200
},
{
"epoch": 1.7127882599580713,
"grad_norm": 2.986510992050171,
"learning_rate": 1.3397459621556128e-07,
"loss": 1.0893,
"step": 205
},
{
"epoch": 1.7547169811320755,
"grad_norm": 3.7319693565368652,
"learning_rate": 9.993458813587884e-08,
"loss": 1.2304,
"step": 210
},
{
"epoch": 1.7966457023060798,
"grad_norm": 4.113098621368408,
"learning_rate": 7.065247577317745e-08,
"loss": 1.1455,
"step": 215
},
{
"epoch": 1.8385744234800838,
"grad_norm": 4.701733589172363,
"learning_rate": 4.6283049251773176e-08,
"loss": 1.1915,
"step": 220
},
{
"epoch": 1.880503144654088,
"grad_norm": 3.9321887493133545,
"learning_rate": 2.6955129420176193e-08,
"loss": 1.2247,
"step": 225
},
{
"epoch": 1.9224318658280923,
"grad_norm": 3.6867830753326416,
"learning_rate": 1.2770886882625952e-08,
"loss": 1.2779,
"step": 230
},
{
"epoch": 1.9643605870020964,
"grad_norm": 2.951756000518799,
"learning_rate": 3.805301908254455e-09,
"loss": 1.2652,
"step": 235
},
{
"epoch": 2.0,
"grad_norm": 7.836752414703369,
"learning_rate": 1.0576807289253142e-10,
"loss": 1.4392,
"step": 240
}
],
"logging_steps": 5,
"max_steps": 240,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2077792876363776.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}