qwen25-coder-1.5b-Deep / logs /training_validation_logs.json
sinem02's picture
Upload folder using huggingface_hub
a2b62cf verified
[
{
"epoch": 0.08,
"grad_norm": 0.28229808807373047,
"learning_rate": 0.0003304347826086957,
"loss": 1.0758,
"step": 20
},
{
"epoch": 0.16,
"grad_norm": 0.3910837173461914,
"learning_rate": 0.0003995221430894122,
"loss": 0.9148,
"step": 40
},
{
"epoch": 0.24,
"grad_norm": 0.37663960456848145,
"learning_rate": 0.00039758476229578745,
"loss": 0.8888,
"step": 60
},
{
"epoch": 0.32,
"grad_norm": 0.2441498339176178,
"learning_rate": 0.0003941724426452488,
"loss": 0.8392,
"step": 80
},
{
"epoch": 0.4,
"grad_norm": 0.32805338501930237,
"learning_rate": 0.0003893106565618147,
"loss": 0.8178,
"step": 100
},
{
"epoch": 0.4,
"eval_loss": 0.8086594939231873,
"eval_runtime": 19.7643,
"eval_samples_per_second": 25.298,
"eval_steps_per_second": 3.188,
"step": 100
},
{
"epoch": 0.48,
"grad_norm": 0.32320863008499146,
"learning_rate": 0.0003830356965061241,
"loss": 0.7901,
"step": 120
},
{
"epoch": 0.56,
"grad_norm": 0.35026517510414124,
"learning_rate": 0.0003753944040579839,
"loss": 0.7661,
"step": 140
},
{
"epoch": 0.64,
"grad_norm": 0.3722197115421295,
"learning_rate": 0.00036644382025141837,
"loss": 0.7126,
"step": 160
},
{
"epoch": 0.72,
"grad_norm": 0.4616721570491791,
"learning_rate": 0.0003562507597724135,
"loss": 0.6517,
"step": 180
},
{
"epoch": 0.8,
"grad_norm": 0.40086525678634644,
"learning_rate": 0.0003448913121979015,
"loss": 0.6215,
"step": 200
},
{
"epoch": 0.8,
"eval_loss": 0.6392109394073486,
"eval_runtime": 19.766,
"eval_samples_per_second": 25.296,
"eval_steps_per_second": 3.187,
"step": 200
},
{
"epoch": 0.88,
"grad_norm": 0.4292043447494507,
"learning_rate": 0.00033245027399915895,
"loss": 0.5898,
"step": 220
},
{
"epoch": 0.96,
"grad_norm": 0.5536438226699829,
"learning_rate": 0.0003190205155496219,
"loss": 0.5709,
"step": 240
},
{
"epoch": 1.04,
"grad_norm": 0.6616698503494263,
"learning_rate": 0.00030470228786230405,
"loss": 0.4551,
"step": 260
},
{
"epoch": 1.12,
"grad_norm": 0.5763731598854065,
"learning_rate": 0.0002896024742319127,
"loss": 0.3672,
"step": 280
},
{
"epoch": 1.2,
"grad_norm": 0.5821401476860046,
"learning_rate": 0.0002738337923680367,
"loss": 0.3648,
"step": 300
},
{
"epoch": 1.2,
"eval_loss": 0.4687094986438751,
"eval_runtime": 19.7698,
"eval_samples_per_second": 25.291,
"eval_steps_per_second": 3.187,
"step": 300
},
{
"epoch": 1.28,
"grad_norm": 0.47784799337387085,
"learning_rate": 0.00025751395297535327,
"loss": 0.3424,
"step": 320
},
{
"epoch": 1.3599999999999999,
"grad_norm": 0.5968295931816101,
"learning_rate": 0.00024076478106192076,
"loss": 0.3253,
"step": 340
},
{
"epoch": 1.44,
"grad_norm": 0.5316683053970337,
"learning_rate": 0.00022371130653484945,
"loss": 0.2952,
"step": 360
},
{
"epoch": 1.52,
"grad_norm": 0.4502660930156708,
"learning_rate": 0.0002064808308719107,
"loss": 0.2728,
"step": 380
},
{
"epoch": 1.6,
"grad_norm": 0.512885570526123,
"learning_rate": 0.00018920197683623203,
"loss": 0.2615,
"step": 400
},
{
"epoch": 1.6,
"eval_loss": 0.3294866383075714,
"eval_runtime": 19.7741,
"eval_samples_per_second": 25.286,
"eval_steps_per_second": 3.186,
"step": 400
},
{
"epoch": 1.6800000000000002,
"grad_norm": 0.510636568069458,
"learning_rate": 0.00017200372832780684,
"loss": 0.2468,
"step": 420
},
{
"epoch": 1.76,
"grad_norm": 0.38415294885635376,
"learning_rate": 0.00015501446753917467,
"loss": 0.2153,
"step": 440
},
{
"epoch": 1.8399999999999999,
"grad_norm": 0.4481910765171051,
"learning_rate": 0.00013836101660275217,
"loss": 0.1996,
"step": 460
},
{
"epoch": 1.92,
"grad_norm": 0.516516387462616,
"learning_rate": 0.000122167690883765,
"loss": 0.1803,
"step": 480
},
{
"epoch": 2.0,
"grad_norm": 0.5795238018035889,
"learning_rate": 0.00010655537098579868,
"loss": 0.1915,
"step": 500
},
{
"epoch": 2.0,
"eval_loss": 0.2246081531047821,
"eval_runtime": 19.7628,
"eval_samples_per_second": 25.3,
"eval_steps_per_second": 3.188,
"step": 500
},
{
"epoch": 2.08,
"grad_norm": 0.46038225293159485,
"learning_rate": 9.164060039629896e-05,
"loss": 0.1179,
"step": 520
},
{
"epoch": 2.16,
"grad_norm": 0.43895432353019714,
"learning_rate": 7.753471550795519e-05,
"loss": 0.1153,
"step": 540
},
{
"epoch": 2.24,
"grad_norm": 0.7098507285118103,
"learning_rate": 6.434301451021892e-05,
"loss": 0.1261,
"step": 560
},
{
"epoch": 2.32,
"grad_norm": 0.3989202082157135,
"learning_rate": 5.216397135505024e-05,
"loss": 0.1121,
"step": 580
},
{
"epoch": 2.4,
"grad_norm": 0.525729775428772,
"learning_rate": 4.108850066451255e-05,
"loss": 0.1186,
"step": 600
},
{
"epoch": 2.4,
"eval_loss": 0.19236330687999725,
"eval_runtime": 19.7599,
"eval_samples_per_second": 25.304,
"eval_steps_per_second": 3.188,
"step": 600
},
{
"epoch": 2.48,
"grad_norm": 0.33528250455856323,
"learning_rate": 3.1199279067563706e-05,
"loss": 0.1075,
"step": 620
},
{
"epoch": 2.56,
"grad_norm": 0.35482296347618103,
"learning_rate": 2.2570128032157568e-05,
"loss": 0.1106,
"step": 640
},
{
"epoch": 2.64,
"grad_norm": 0.2427404522895813,
"learning_rate": 1.526546279971466e-05,
"loss": 0.0932,
"step": 660
},
{
"epoch": 2.7199999999999998,
"grad_norm": 0.27014562487602234,
"learning_rate": 9.339811535579768e-06,
"loss": 0.1005,
"step": 680
},
{
"epoch": 2.8,
"grad_norm": 0.3576093018054962,
"learning_rate": 4.837408284931444e-06,
"loss": 0.0948,
"step": 700
},
{
"epoch": 2.8,
"eval_loss": 0.17951039969921112,
"eval_runtime": 19.7723,
"eval_samples_per_second": 25.288,
"eval_steps_per_second": 3.186,
"step": 700
},
{
"epoch": 2.88,
"grad_norm": 0.3262103199958801,
"learning_rate": 1.7918627726630777e-06,
"loss": 0.0939,
"step": 720
},
{
"epoch": 2.96,
"grad_norm": 0.2655605971813202,
"learning_rate": 2.259095121265542e-07,
"loss": 0.0953,
"step": 740
}
]