sdlc-slm-5m / training_log.json
sathishphdai's picture
Upload SDLC-SLM v2: LLaMA-style with RoPE (1M context)
f024a6d verified
[
{
"step": 50,
"epoch": 1,
"train_loss": 7.011437892913818,
"eval_loss": 7.229861998558045
},
{
"step": 100,
"epoch": 1,
"train_loss": 6.745733737945557,
"eval_loss": 6.7441881656646725
},
{
"step": 150,
"epoch": 1,
"train_loss": 6.201735496520996,
"eval_loss": 6.213063597679138
},
{
"step": 200,
"epoch": 1,
"train_loss": 6.189537048339844,
"eval_loss": 6.045387864112854
},
{
"step": 250,
"epoch": 1,
"train_loss": 5.690642833709717,
"eval_loss": 5.645134687423706
},
{
"step": 300,
"epoch": 1,
"train_loss": 5.6198320388793945,
"eval_loss": 5.313832426071167
},
{
"step": 350,
"epoch": 2,
"train_loss": 5.608547210693359,
"eval_loss": 5.363248491287232
},
{
"step": 400,
"epoch": 2,
"train_loss": 5.0405378341674805,
"eval_loss": 4.881580257415772
},
{
"step": 450,
"epoch": 2,
"train_loss": 5.173409938812256,
"eval_loss": 4.902361559867859
},
{
"step": 500,
"epoch": 2,
"train_loss": 5.284115791320801,
"eval_loss": 4.773967957496643
},
{
"step": 550,
"epoch": 2,
"train_loss": 4.780450820922852,
"eval_loss": 4.657914161682129
},
{
"step": 600,
"epoch": 2,
"train_loss": 4.791866302490234,
"eval_loss": 4.521592164039612
},
{
"step": 650,
"epoch": 3,
"train_loss": 4.705374717712402,
"eval_loss": 4.517299568653106
},
{
"step": 700,
"epoch": 3,
"train_loss": 4.529109001159668,
"eval_loss": 4.361143231391907
},
{
"step": 750,
"epoch": 3,
"train_loss": 4.330467224121094,
"eval_loss": 3.98987900018692
},
{
"step": 800,
"epoch": 3,
"train_loss": 4.7155442237854,
"eval_loss": 3.9433867573738097
},
{
"step": 850,
"epoch": 3,
"train_loss": 4.351643085479736,
"eval_loss": 3.800583028793335
},
{
"step": 900,
"epoch": 3,
"train_loss": 4.464141845703125,
"eval_loss": 4.003297185897827
},
{
"step": 950,
"epoch": 4,
"train_loss": 4.136284828186035,
"eval_loss": 3.77255722284317
},
{
"step": 1000,
"epoch": 4,
"train_loss": 3.2864787578582764,
"eval_loss": 3.586808240413666
},
{
"step": 1050,
"epoch": 4,
"train_loss": 3.9320664405822754,
"eval_loss": 3.472767782211304
},
{
"step": 1100,
"epoch": 4,
"train_loss": 3.6474409103393555,
"eval_loss": 3.361958682537079
},
{
"step": 1150,
"epoch": 4,
"train_loss": 3.7754194736480713,
"eval_loss": 3.4830828666687013
},
{
"step": 1200,
"epoch": 4,
"train_loss": 3.6011319160461426,
"eval_loss": 3.1169363617897035
},
{
"step": 1250,
"epoch": 5,
"train_loss": 3.0961451530456543,
"eval_loss": 3.1844743490219116
},
{
"step": 1300,
"epoch": 5,
"train_loss": 2.97489595413208,
"eval_loss": 2.998428153991699
},
{
"step": 1350,
"epoch": 5,
"train_loss": 3.4356741905212402,
"eval_loss": 2.805982267856598
},
{
"step": 1400,
"epoch": 5,
"train_loss": 3.468571186065674,
"eval_loss": 2.6748260080814363
},
{
"step": 1450,
"epoch": 5,
"train_loss": 2.341953992843628,
"eval_loss": 2.145216536521912
},
{
"step": 1500,
"epoch": 5,
"train_loss": 3.089789867401123,
"eval_loss": 2.3635078251361845
},
{
"step": 1550,
"epoch": 6,
"train_loss": 2.189382791519165,
"eval_loss": 2.2221537292003632
},
{
"step": 1600,
"epoch": 6,
"train_loss": 2.726090908050537,
"eval_loss": 2.1547133922576904
},
{
"step": 1650,
"epoch": 6,
"train_loss": 2.9023475646972656,
"eval_loss": 1.9779397070407867
},
{
"step": 1700,
"epoch": 6,
"train_loss": 2.1860427856445312,
"eval_loss": 2.561609756946564
},
{
"step": 1750,
"epoch": 6,
"train_loss": 2.254507064819336,
"eval_loss": 1.928052657842636
},
{
"step": 1800,
"epoch": 6,
"train_loss": 2.4110028743743896,
"eval_loss": 1.6561946600675583
},
{
"step": 1850,
"epoch": 7,
"train_loss": 1.9080724716186523,
"eval_loss": 1.5860632449388503
},
{
"step": 1900,
"epoch": 7,
"train_loss": 2.4111766815185547,
"eval_loss": 1.7199637949466706
},
{
"step": 1950,
"epoch": 7,
"train_loss": 1.6637767553329468,
"eval_loss": 1.5440789580345153
},
{
"step": 2000,
"epoch": 7,
"train_loss": 1.2018718719482422,
"eval_loss": 1.2651137679815292
},
{
"step": 2050,
"epoch": 7,
"train_loss": 2.2152888774871826,
"eval_loss": 1.4495389491319657
},
{
"step": 2100,
"epoch": 7,
"train_loss": 2.2379002571105957,
"eval_loss": 1.4080950930714606
},
{
"step": 2150,
"epoch": 7,
"train_loss": 1.4027272462844849,
"eval_loss": 1.1051764652132987
},
{
"step": 2200,
"epoch": 8,
"train_loss": 1.6132153272628784,
"eval_loss": 1.102821797132492
},
{
"step": 2250,
"epoch": 8,
"train_loss": 1.8127695322036743,
"eval_loss": 1.1629648685455323
},
{
"step": 2300,
"epoch": 8,
"train_loss": 1.5062594413757324,
"eval_loss": 1.074837401509285
},
{
"step": 2350,
"epoch": 8,
"train_loss": 1.83341383934021,
"eval_loss": 0.7862243682146073
},
{
"step": 2400,
"epoch": 8,
"train_loss": 2.754513740539551,
"eval_loss": 0.9054330654442311
},
{
"step": 2450,
"epoch": 8,
"train_loss": 2.325956344604492,
"eval_loss": 0.7618165738880635
},
{
"step": 2500,
"epoch": 9,
"train_loss": 1.8084752559661865,
"eval_loss": 0.8403361894190311
},
{
"step": 2550,
"epoch": 9,
"train_loss": 0.8731088042259216,
"eval_loss": 0.8205475762486458
},
{
"step": 2600,
"epoch": 9,
"train_loss": 1.7206931114196777,
"eval_loss": 0.947148085385561
},
{
"step": 2650,
"epoch": 9,
"train_loss": 1.1028999090194702,
"eval_loss": 0.8842812821269035
},
{
"step": 2700,
"epoch": 9,
"train_loss": 1.0300450325012207,
"eval_loss": 0.9720453128218651
},
{
"step": 2750,
"epoch": 9,
"train_loss": 0.734760046005249,
"eval_loss": 0.7927527688443661
},
{
"step": 2800,
"epoch": 10,
"train_loss": 1.3911309242248535,
"eval_loss": 1.0234219774603843
},
{
"step": 2850,
"epoch": 10,
"train_loss": 0.6058249473571777,
"eval_loss": 0.538704963028431
},
{
"step": 2900,
"epoch": 10,
"train_loss": 1.1464016437530518,
"eval_loss": 0.6425696827471257
},
{
"step": 2950,
"epoch": 10,
"train_loss": 0.6506685614585876,
"eval_loss": 0.8487898081541061
},
{
"step": 3000,
"epoch": 10,
"train_loss": 0.5876604318618774,
"eval_loss": 0.9119798105210066
},
{
"step": 3050,
"epoch": 10,
"train_loss": 0.9894850254058838,
"eval_loss": 0.6499821018427611
},
{
"step": 3100,
"epoch": 11,
"train_loss": 1.2313616275787354,
"eval_loss": 0.6424531193450094
},
{
"step": 3150,
"epoch": 11,
"train_loss": 0.37773647904396057,
"eval_loss": 0.38214228078722956
},
{
"step": 3200,
"epoch": 11,
"train_loss": 1.0256904363632202,
"eval_loss": 0.344718436896801
},
{
"step": 3250,
"epoch": 11,
"train_loss": 0.8545166850090027,
"eval_loss": 0.37639920189976694
},
{
"step": 3300,
"epoch": 11,
"train_loss": 0.7513008117675781,
"eval_loss": 0.5736159734427929
},
{
"step": 3350,
"epoch": 11,
"train_loss": 1.3513377904891968,
"eval_loss": 0.7047768605872988
},
{
"step": 3400,
"epoch": 12,
"train_loss": 0.729595422744751,
"eval_loss": 0.22621392011642455
},
{
"step": 3450,
"epoch": 12,
"train_loss": 0.8922204971313477,
"eval_loss": 0.48001913614571096
},
{
"step": 3500,
"epoch": 12,
"train_loss": 0.3643261194229126,
"eval_loss": 0.27453359626233576
},
{
"step": 3550,
"epoch": 12,
"train_loss": 0.4188968539237976,
"eval_loss": 0.47001718934625386
},
{
"step": 3600,
"epoch": 12,
"train_loss": 0.5224657654762268,
"eval_loss": 0.3317598044872284
},
{
"step": 3650,
"epoch": 12,
"train_loss": 0.5869072675704956,
"eval_loss": 0.3092077659443021
},
{
"step": 3700,
"epoch": 13,
"train_loss": 0.6399657130241394,
"eval_loss": 0.44537014681845905
},
{
"step": 3750,
"epoch": 13,
"train_loss": 0.5388372540473938,
"eval_loss": 0.42834142986685037
},
{
"step": 3800,
"epoch": 13,
"train_loss": 0.31678545475006104,
"eval_loss": 0.37301751840859654
},
{
"step": 3850,
"epoch": 13,
"train_loss": 1.0250321626663208,
"eval_loss": 0.3215056784451008
},
{
"step": 3900,
"epoch": 13,
"train_loss": 0.7055963277816772,
"eval_loss": 0.44134364230558276
},
{
"step": 3950,
"epoch": 13,
"train_loss": 0.6139594316482544,
"eval_loss": 0.2871012590825558
},
{
"step": 4000,
"epoch": 13,
"train_loss": 0.7829692959785461,
"eval_loss": 0.32805103398859503
},
{
"step": 4050,
"epoch": 14,
"train_loss": 0.89658522605896,
"eval_loss": 0.20069403741508723
},
{
"step": 4100,
"epoch": 14,
"train_loss": 0.5302367210388184,
"eval_loss": 0.28595281541347506
},
{
"step": 4150,
"epoch": 14,
"train_loss": 0.26509907841682434,
"eval_loss": 0.41278840391896665
},
{
"step": 4200,
"epoch": 14,
"train_loss": 0.44153666496276855,
"eval_loss": 0.22166384682059287
},
{
"step": 4250,
"epoch": 14,
"train_loss": 0.7794387340545654,
"eval_loss": 0.3483078759163618
},
{
"step": 4300,
"epoch": 14,
"train_loss": 0.6767944097518921,
"eval_loss": 0.4360230155289173
},
{
"step": 4350,
"epoch": 15,
"train_loss": 0.5257160663604736,
"eval_loss": 0.3954934066627175
},
{
"step": 4400,
"epoch": 15,
"train_loss": 0.5993033051490784,
"eval_loss": 0.31953859254717826
},
{
"step": 4450,
"epoch": 15,
"train_loss": 0.8400174379348755,
"eval_loss": 0.28360011894255877
},
{
"step": 4500,
"epoch": 15,
"train_loss": 0.21801462769508362,
"eval_loss": 0.13924190681427717
},
{
"step": 4550,
"epoch": 15,
"train_loss": 0.7216958999633789,
"eval_loss": 0.38468425907194614
},
{
"step": 4600,
"epoch": 15,
"train_loss": 0.23532339930534363,
"eval_loss": 0.2397186360321939
},
{
"step": 4650,
"epoch": 16,
"train_loss": 0.40155768394470215,
"eval_loss": 0.27377752810716627
},
{
"step": 4700,
"epoch": 16,
"train_loss": 0.6509999632835388,
"eval_loss": 0.17066196743398904
},
{
"step": 4750,
"epoch": 16,
"train_loss": 0.16792532801628113,
"eval_loss": 0.25832435097545386
},
{
"step": 4800,
"epoch": 16,
"train_loss": 0.5321880578994751,
"eval_loss": 0.21553349401801825
},
{
"step": 4850,
"epoch": 16,
"train_loss": 0.8054535984992981,
"eval_loss": 0.3256093241274357
},
{
"step": 4900,
"epoch": 16,
"train_loss": 0.6085711717605591,
"eval_loss": 0.14158667558804156
},
{
"step": 4950,
"epoch": 17,
"train_loss": 0.7682583928108215,
"eval_loss": 0.19988978593610227
},
{
"step": 5000,
"epoch": 17,
"train_loss": 0.7190942764282227,
"eval_loss": 0.19306682515889406
},
{
"step": 5050,
"epoch": 17,
"train_loss": 0.6006054878234863,
"eval_loss": 0.19667899813503026
},
{
"step": 5100,
"epoch": 17,
"train_loss": 0.1798602044582367,
"eval_loss": 0.18447849657386542
},
{
"step": 5150,
"epoch": 17,
"train_loss": 0.201195627450943,
"eval_loss": 0.2557945422828197
},
{
"step": 5200,
"epoch": 17,
"train_loss": 0.3808324933052063,
"eval_loss": 0.2205318679101765
},
{
"step": 5250,
"epoch": 18,
"train_loss": 0.7183626890182495,
"eval_loss": 0.18480183414649218
},
{
"step": 5300,
"epoch": 18,
"train_loss": 0.15622234344482422,
"eval_loss": 0.21750007439404725
},
{
"step": 5350,
"epoch": 18,
"train_loss": 0.5174674987792969,
"eval_loss": 0.207568721473217
},
{
"step": 5400,
"epoch": 18,
"train_loss": 0.3536418676376343,
"eval_loss": 0.24083916349336504
},
{
"step": 5450,
"epoch": 18,
"train_loss": 0.6717914938926697,
"eval_loss": 0.21798975663259626
},
{
"step": 5500,
"epoch": 18,
"train_loss": 0.49078643321990967,
"eval_loss": 0.2800134336575866
},
{
"step": 5550,
"epoch": 19,
"train_loss": 0.5110766887664795,
"eval_loss": 0.19590738862752916
},
{
"step": 5600,
"epoch": 19,
"train_loss": 0.75737464427948,
"eval_loss": 0.31123267151415346
},
{
"step": 5650,
"epoch": 19,
"train_loss": 0.5591607093811035,
"eval_loss": 0.21578684272244572
},
{
"step": 5700,
"epoch": 19,
"train_loss": 0.49477618932724,
"eval_loss": 0.18046770989894867
},
{
"step": 5750,
"epoch": 19,
"train_loss": 0.1687086671590805,
"eval_loss": 0.12973198837134986
},
{
"step": 5800,
"epoch": 19,
"train_loss": 0.8483715057373047,
"eval_loss": 0.20042803240939974
},
{
"step": 5850,
"epoch": 19,
"train_loss": 0.4635620713233948,
"eval_loss": 0.17925645909272134
},
{
"step": 5900,
"epoch": 20,
"train_loss": 0.2212029993534088,
"eval_loss": 0.20806707707233726
},
{
"step": 5950,
"epoch": 20,
"train_loss": 0.9533796310424805,
"eval_loss": 0.14048943249508739
},
{
"step": 6000,
"epoch": 20,
"train_loss": 0.5811203718185425,
"eval_loss": 0.26958920201286674
},
{
"step": 6050,
"epoch": 20,
"train_loss": 0.34601131081581116,
"eval_loss": 0.20130174113437532
},
{
"step": 6100,
"epoch": 20,
"train_loss": 0.3536478877067566,
"eval_loss": 0.19252710570581258
},
{
"step": 6150,
"epoch": 20,
"train_loss": 0.19176742434501648,
"eval_loss": 0.24033887404948473
}
]