broadfield-dev's picture
Duplicate from broadfield-dev/savant_2_gsm8k_final
07aa2fb verified
{
"phase": "SFT",
"total_optimizer_steps": 80,
"metrics": [
{
"Step": 1,
"Metric": "SFT Loss",
"Value": 10.806696891784668
},
{
"Step": 1,
"Metric": "Learning Rate",
"Value": 4.0000000000000003e-07
},
{
"Step": 2,
"Metric": "SFT Loss",
"Value": 20.197086334228516
},
{
"Step": 2,
"Metric": "Learning Rate",
"Value": 8.000000000000001e-07
},
{
"Step": 3,
"Metric": "SFT Loss",
"Value": 13.669203758239746
},
{
"Step": 3,
"Metric": "Learning Rate",
"Value": 1.2000000000000002e-06
},
{
"Step": 4,
"Metric": "SFT Loss",
"Value": 19.52617835998535
},
{
"Step": 4,
"Metric": "Learning Rate",
"Value": 1.6000000000000001e-06
},
{
"Step": 5,
"Metric": "SFT Loss",
"Value": 14.46614933013916
},
{
"Step": 5,
"Metric": "Learning Rate",
"Value": 2.0000000000000003e-06
},
{
"Step": 6,
"Metric": "SFT Loss",
"Value": 11.709370613098145
},
{
"Step": 6,
"Metric": "Learning Rate",
"Value": 2.4000000000000003e-06
},
{
"Step": 7,
"Metric": "SFT Loss",
"Value": 14.79263687133789
},
{
"Step": 7,
"Metric": "Learning Rate",
"Value": 2.8000000000000003e-06
},
{
"Step": 8,
"Metric": "SFT Loss",
"Value": 16.564573287963867
},
{
"Step": 8,
"Metric": "Learning Rate",
"Value": 3.2000000000000003e-06
},
{
"Step": 9,
"Metric": "SFT Loss",
"Value": 16.437776565551758
},
{
"Step": 9,
"Metric": "Learning Rate",
"Value": 3.6000000000000003e-06
},
{
"Step": 10,
"Metric": "SFT Loss",
"Value": 14.215919494628906
},
{
"Step": 10,
"Metric": "Learning Rate",
"Value": 4.000000000000001e-06
},
{
"Step": 11,
"Metric": "SFT Loss",
"Value": 13.728559494018555
},
{
"Step": 11,
"Metric": "Learning Rate",
"Value": 4.4e-06
},
{
"Step": 12,
"Metric": "SFT Loss",
"Value": 17.120494842529297
},
{
"Step": 12,
"Metric": "Learning Rate",
"Value": 4.800000000000001e-06
},
{
"Step": 13,
"Metric": "SFT Loss",
"Value": 12.443873405456543
},
{
"Step": 13,
"Metric": "Learning Rate",
"Value": 5.2e-06
},
{
"Step": 14,
"Metric": "SFT Loss",
"Value": 10.322935104370117
},
{
"Step": 14,
"Metric": "Learning Rate",
"Value": 5.600000000000001e-06
},
{
"Step": 15,
"Metric": "SFT Loss",
"Value": 10.010440826416016
},
{
"Step": 15,
"Metric": "Learning Rate",
"Value": 6e-06
},
{
"Step": 16,
"Metric": "SFT Loss",
"Value": 9.959354400634766
},
{
"Step": 16,
"Metric": "Learning Rate",
"Value": 6.4000000000000006e-06
},
{
"Step": 17,
"Metric": "SFT Loss",
"Value": 9.42132568359375
},
{
"Step": 17,
"Metric": "Learning Rate",
"Value": 6.800000000000001e-06
},
{
"Step": 18,
"Metric": "SFT Loss",
"Value": 10.809995651245117
},
{
"Step": 18,
"Metric": "Learning Rate",
"Value": 7.2000000000000005e-06
},
{
"Step": 19,
"Metric": "SFT Loss",
"Value": 11.641499519348145
},
{
"Step": 19,
"Metric": "Learning Rate",
"Value": 7.600000000000001e-06
},
{
"Step": 20,
"Metric": "SFT Loss",
"Value": 8.137388229370117
},
{
"Step": 20,
"Metric": "Learning Rate",
"Value": 8.000000000000001e-06
},
{
"Step": 21,
"Metric": "SFT Loss",
"Value": 11.187762260437012
},
{
"Step": 21,
"Metric": "Learning Rate",
"Value": 8.400000000000001e-06
},
{
"Step": 22,
"Metric": "SFT Loss",
"Value": 6.696204662322998
},
{
"Step": 22,
"Metric": "Learning Rate",
"Value": 8.8e-06
},
{
"Step": 23,
"Metric": "SFT Loss",
"Value": 9.350967407226562
},
{
"Step": 23,
"Metric": "Learning Rate",
"Value": 9.200000000000002e-06
},
{
"Step": 24,
"Metric": "SFT Loss",
"Value": 7.601217269897461
},
{
"Step": 24,
"Metric": "Learning Rate",
"Value": 9.600000000000001e-06
},
{
"Step": 25,
"Metric": "SFT Loss",
"Value": 7.671919345855713
},
{
"Step": 25,
"Metric": "Learning Rate",
"Value": 1e-05
},
{
"Step": 26,
"Metric": "SFT Loss",
"Value": 5.797372341156006
},
{
"Step": 26,
"Metric": "Learning Rate",
"Value": 1.04e-05
},
{
"Step": 27,
"Metric": "SFT Loss",
"Value": 7.414616107940674
},
{
"Step": 27,
"Metric": "Learning Rate",
"Value": 1.0800000000000002e-05
},
{
"Step": 28,
"Metric": "SFT Loss",
"Value": 7.067437171936035
},
{
"Step": 28,
"Metric": "Learning Rate",
"Value": 1.1200000000000001e-05
},
{
"Step": 29,
"Metric": "SFT Loss",
"Value": 6.7167463302612305
},
{
"Step": 29,
"Metric": "Learning Rate",
"Value": 1.16e-05
},
{
"Step": 30,
"Metric": "SFT Loss",
"Value": 6.630039691925049
},
{
"Step": 30,
"Metric": "Learning Rate",
"Value": 1.2e-05
},
{
"Step": 31,
"Metric": "SFT Loss",
"Value": 6.276569366455078
},
{
"Step": 31,
"Metric": "Learning Rate",
"Value": 1.2400000000000002e-05
},
{
"Step": 32,
"Metric": "SFT Loss",
"Value": 7.236689567565918
},
{
"Step": 32,
"Metric": "Learning Rate",
"Value": 1.2800000000000001e-05
},
{
"Step": 33,
"Metric": "SFT Loss",
"Value": 6.829200267791748
},
{
"Step": 33,
"Metric": "Learning Rate",
"Value": 1.3200000000000002e-05
},
{
"Step": 34,
"Metric": "SFT Loss",
"Value": 6.348658561706543
},
{
"Step": 34,
"Metric": "Learning Rate",
"Value": 1.3600000000000002e-05
},
{
"Step": 35,
"Metric": "SFT Loss",
"Value": 6.114589691162109
},
{
"Step": 35,
"Metric": "Learning Rate",
"Value": 1.4e-05
},
{
"Step": 36,
"Metric": "SFT Loss",
"Value": 5.820864677429199
},
{
"Step": 36,
"Metric": "Learning Rate",
"Value": 1.4400000000000001e-05
},
{
"Step": 37,
"Metric": "SFT Loss",
"Value": 6.093341827392578
},
{
"Step": 37,
"Metric": "Learning Rate",
"Value": 1.48e-05
},
{
"Step": 38,
"Metric": "SFT Loss",
"Value": 6.195661544799805
},
{
"Step": 38,
"Metric": "Learning Rate",
"Value": 1.5200000000000002e-05
},
{
"Step": 39,
"Metric": "SFT Loss",
"Value": 7.604869365692139
},
{
"Step": 39,
"Metric": "Learning Rate",
"Value": 1.5600000000000003e-05
},
{
"Step": 40,
"Metric": "SFT Loss",
"Value": 5.295370101928711
},
{
"Step": 40,
"Metric": "Learning Rate",
"Value": 1.6000000000000003e-05
},
{
"Step": 41,
"Metric": "SFT Loss",
"Value": 6.263326644897461
},
{
"Step": 41,
"Metric": "Learning Rate",
"Value": 1.64e-05
},
{
"Step": 42,
"Metric": "SFT Loss",
"Value": 6.123704433441162
},
{
"Step": 42,
"Metric": "Learning Rate",
"Value": 1.6800000000000002e-05
},
{
"Step": 43,
"Metric": "SFT Loss",
"Value": 6.104567050933838
},
{
"Step": 43,
"Metric": "Learning Rate",
"Value": 1.72e-05
},
{
"Step": 44,
"Metric": "SFT Loss",
"Value": 6.757107257843018
},
{
"Step": 44,
"Metric": "Learning Rate",
"Value": 1.76e-05
},
{
"Step": 45,
"Metric": "SFT Loss",
"Value": 6.533822059631348
},
{
"Step": 45,
"Metric": "Learning Rate",
"Value": 1.8e-05
},
{
"Step": 46,
"Metric": "SFT Loss",
"Value": 6.507102012634277
},
{
"Step": 46,
"Metric": "Learning Rate",
"Value": 1.8400000000000003e-05
},
{
"Step": 47,
"Metric": "SFT Loss",
"Value": 6.377279758453369
},
{
"Step": 47,
"Metric": "Learning Rate",
"Value": 1.88e-05
},
{
"Step": 48,
"Metric": "SFT Loss",
"Value": 6.2640581130981445
},
{
"Step": 48,
"Metric": "Learning Rate",
"Value": 1.9200000000000003e-05
},
{
"Step": 49,
"Metric": "SFT Loss",
"Value": 5.53227424621582
},
{
"Step": 49,
"Metric": "Learning Rate",
"Value": 1.9600000000000002e-05
},
{
"Step": 50,
"Metric": "SFT Loss",
"Value": 6.743251800537109
},
{
"Step": 50,
"Metric": "Learning Rate",
"Value": 2e-05
},
{
"Step": 51,
"Metric": "SFT Loss",
"Value": 5.5736985206604
},
{
"Step": 51,
"Metric": "Learning Rate",
"Value": 1.9955555555555557e-05
},
{
"Step": 52,
"Metric": "SFT Loss",
"Value": 5.360650539398193
},
{
"Step": 52,
"Metric": "Learning Rate",
"Value": 1.9911111111111112e-05
},
{
"Step": 53,
"Metric": "SFT Loss",
"Value": 5.423285484313965
},
{
"Step": 53,
"Metric": "Learning Rate",
"Value": 1.9866666666666667e-05
},
{
"Step": 54,
"Metric": "SFT Loss",
"Value": 5.108491897583008
},
{
"Step": 54,
"Metric": "Learning Rate",
"Value": 1.9822222222222226e-05
},
{
"Step": 55,
"Metric": "SFT Loss",
"Value": 4.632509231567383
},
{
"Step": 55,
"Metric": "Learning Rate",
"Value": 1.977777777777778e-05
},
{
"Step": 56,
"Metric": "SFT Loss",
"Value": 4.9901933670043945
},
{
"Step": 56,
"Metric": "Learning Rate",
"Value": 1.9733333333333336e-05
},
{
"Step": 57,
"Metric": "SFT Loss",
"Value": 4.968444347381592
},
{
"Step": 57,
"Metric": "Learning Rate",
"Value": 1.968888888888889e-05
},
{
"Step": 58,
"Metric": "SFT Loss",
"Value": 5.028273582458496
},
{
"Step": 58,
"Metric": "Learning Rate",
"Value": 1.9644444444444447e-05
},
{
"Step": 59,
"Metric": "SFT Loss",
"Value": 5.093855381011963
},
{
"Step": 59,
"Metric": "Learning Rate",
"Value": 1.9600000000000002e-05
},
{
"Step": 60,
"Metric": "SFT Loss",
"Value": 4.720518112182617
},
{
"Step": 60,
"Metric": "Learning Rate",
"Value": 1.9555555555555557e-05
},
{
"Step": 61,
"Metric": "SFT Loss",
"Value": 4.872599124908447
},
{
"Step": 61,
"Metric": "Learning Rate",
"Value": 1.9511111111111113e-05
},
{
"Step": 62,
"Metric": "SFT Loss",
"Value": 4.398301124572754
},
{
"Step": 62,
"Metric": "Learning Rate",
"Value": 1.9466666666666668e-05
},
{
"Step": 63,
"Metric": "SFT Loss",
"Value": 5.259819030761719
},
{
"Step": 63,
"Metric": "Learning Rate",
"Value": 1.9422222222222223e-05
},
{
"Step": 64,
"Metric": "SFT Loss",
"Value": 5.536286354064941
},
{
"Step": 64,
"Metric": "Learning Rate",
"Value": 1.9377777777777778e-05
},
{
"Step": 65,
"Metric": "SFT Loss",
"Value": 4.998850345611572
},
{
"Step": 65,
"Metric": "Learning Rate",
"Value": 1.9333333333333333e-05
},
{
"Step": 66,
"Metric": "SFT Loss",
"Value": 4.723838806152344
},
{
"Step": 66,
"Metric": "Learning Rate",
"Value": 1.928888888888889e-05
},
{
"Step": 67,
"Metric": "SFT Loss",
"Value": 5.087211608886719
},
{
"Step": 67,
"Metric": "Learning Rate",
"Value": 1.9244444444444444e-05
},
{
"Step": 68,
"Metric": "SFT Loss",
"Value": 4.477473258972168
},
{
"Step": 68,
"Metric": "Learning Rate",
"Value": 1.9200000000000003e-05
},
{
"Step": 69,
"Metric": "SFT Loss",
"Value": 4.780407905578613
},
{
"Step": 69,
"Metric": "Learning Rate",
"Value": 1.9155555555555558e-05
},
{
"Step": 70,
"Metric": "SFT Loss",
"Value": 4.482248783111572
},
{
"Step": 70,
"Metric": "Learning Rate",
"Value": 1.9111111111111113e-05
},
{
"Step": 71,
"Metric": "SFT Loss",
"Value": 4.773695468902588
},
{
"Step": 71,
"Metric": "Learning Rate",
"Value": 1.9066666666666668e-05
},
{
"Step": 72,
"Metric": "SFT Loss",
"Value": 4.399933815002441
},
{
"Step": 72,
"Metric": "Learning Rate",
"Value": 1.9022222222222223e-05
},
{
"Step": 73,
"Metric": "SFT Loss",
"Value": 4.540095329284668
},
{
"Step": 73,
"Metric": "Learning Rate",
"Value": 1.897777777777778e-05
},
{
"Step": 74,
"Metric": "SFT Loss",
"Value": 4.6496663093566895
},
{
"Step": 74,
"Metric": "Learning Rate",
"Value": 1.8933333333333334e-05
},
{
"Step": 75,
"Metric": "SFT Loss",
"Value": 5.276064872741699
},
{
"Step": 75,
"Metric": "Learning Rate",
"Value": 1.888888888888889e-05
},
{
"Step": 76,
"Metric": "SFT Loss",
"Value": 4.346099853515625
},
{
"Step": 76,
"Metric": "Learning Rate",
"Value": 1.8844444444444444e-05
},
{
"Step": 77,
"Metric": "SFT Loss",
"Value": 4.5329694747924805
},
{
"Step": 77,
"Metric": "Learning Rate",
"Value": 1.88e-05
},
{
"Step": 78,
"Metric": "SFT Loss",
"Value": 4.541317939758301
},
{
"Step": 78,
"Metric": "Learning Rate",
"Value": 1.8755555555555558e-05
},
{
"Step": 79,
"Metric": "SFT Loss",
"Value": 4.832979202270508
},
{
"Step": 79,
"Metric": "Learning Rate",
"Value": 1.8711111111111113e-05
},
{
"Step": 80,
"Metric": "SFT Loss",
"Value": 4.471445560455322
},
{
"Step": 80,
"Metric": "Learning Rate",
"Value": 1.866666666666667e-05
}
],
"sft_complete": false
}