SirajRLX's picture
Add Devstral-24B SFT training run
89ec0cd verified
{"ts": "2025-12-23T14:16:36", "event": "eval", "step": 100, "epoch": 0.043149946062567425, "eval_loss": 0.9360476136207581, "eval_runtime": 834.9, "eval_samples_per_second": 2.467, "eval_steps_per_second": 2.467, "perplexity": 2.5498833515166743}
{"ts": "2025-12-23T14:51:03", "event": "eval", "step": 100, "epoch": 0.043149946062567425, "eval_loss": 1.128923773765564, "eval_runtime": 825.0166, "eval_samples_per_second": 2.497, "eval_steps_per_second": 2.497, "perplexity": 3.0923266655355475}
{"ts": "2025-12-23T15:20:59", "event": "eval", "step": 200, "epoch": 0.08629989212513485, "eval_loss": 0.981117308139801, "eval_runtime": 826.4469, "eval_samples_per_second": 2.493, "eval_steps_per_second": 2.493, "perplexity": 2.6674349243198323}
{"ts": "2025-12-23T15:50:45", "event": "eval", "step": 300, "epoch": 0.12944983818770225, "eval_loss": 0.9384957551956177, "eval_runtime": 835.4727, "eval_samples_per_second": 2.466, "eval_steps_per_second": 2.466, "perplexity": 2.556133474431821}
{"ts": "2025-12-23T16:20:43", "event": "eval", "step": 400, "epoch": 0.1725997842502697, "eval_loss": 0.9124138355255127, "eval_runtime": 832.6331, "eval_samples_per_second": 2.474, "eval_steps_per_second": 2.474, "perplexity": 2.490326522778942}
{"ts": "2025-12-23T16:50:41", "event": "eval", "step": 500, "epoch": 0.21574973031283712, "eval_loss": 0.8945327997207642, "eval_runtime": 822.9776, "eval_samples_per_second": 2.503, "eval_steps_per_second": 2.503, "perplexity": 2.4461926605539563}
{"ts": "2025-12-23T17:20:42", "event": "eval", "step": 600, "epoch": 0.2588996763754045, "eval_loss": 0.8770867586135864, "eval_runtime": 827.8893, "eval_samples_per_second": 2.488, "eval_steps_per_second": 2.488, "perplexity": 2.4038863943096764}
{"ts": "2025-12-23T17:50:37", "event": "eval", "step": 700, "epoch": 0.30204962243797195, "eval_loss": 0.8631294369697571, "eval_runtime": 839.6269, "eval_samples_per_second": 2.453, "eval_steps_per_second": 2.453, "perplexity": 2.3705676398868443}
{"ts": "2025-12-23T18:20:39", "event": "eval", "step": 800, "epoch": 0.3451995685005394, "eval_loss": 0.8476243615150452, "eval_runtime": 831.6735, "eval_samples_per_second": 2.477, "eval_steps_per_second": 2.477, "perplexity": 2.3340952936819845}
{"ts": "2025-12-23T18:50:33", "event": "eval", "step": 900, "epoch": 0.3883495145631068, "eval_loss": 0.8351938128471375, "eval_runtime": 823.5649, "eval_samples_per_second": 2.501, "eval_steps_per_second": 2.501, "perplexity": 2.305260794150979}
{"ts": "2025-12-23T19:20:44", "event": "eval", "step": 1000, "epoch": 0.43149946062567424, "eval_loss": 0.8266019821166992, "eval_runtime": 835.0044, "eval_samples_per_second": 2.467, "eval_steps_per_second": 2.467, "perplexity": 2.2855392271205766}
{"ts": "2025-12-23T19:50:37", "event": "eval", "step": 1100, "epoch": 0.4746494066882416, "eval_loss": 0.8152187466621399, "eval_runtime": 835.0394, "eval_samples_per_second": 2.467, "eval_steps_per_second": 2.467, "perplexity": 2.2596699135420506}
{"ts": "2025-12-23T20:20:32", "event": "eval", "step": 1200, "epoch": 0.517799352750809, "eval_loss": 0.8070361018180847, "eval_runtime": 823.9296, "eval_samples_per_second": 2.5, "eval_steps_per_second": 2.5, "perplexity": 2.24125528012424}
{"ts": "2025-12-23T20:50:45", "event": "eval", "step": 1300, "epoch": 0.5609492988133765, "eval_loss": 0.7962777018547058, "eval_runtime": 827.8798, "eval_samples_per_second": 2.488, "eval_steps_per_second": 2.488, "perplexity": 2.2172722005047705}
{"ts": "2025-12-23T21:20:37", "event": "eval", "step": 1400, "epoch": 0.6040992448759439, "eval_loss": 0.7866972088813782, "eval_runtime": 836.0759, "eval_samples_per_second": 2.464, "eval_steps_per_second": 2.464, "perplexity": 2.1961310726855836}
{"ts": "2025-12-23T21:50:35", "event": "eval", "step": 1500, "epoch": 0.6472491909385113, "eval_loss": 0.778505802154541, "eval_runtime": 836.2317, "eval_samples_per_second": 2.463, "eval_steps_per_second": 2.463, "perplexity": 2.1782151483338836}
{"ts": "2025-12-23T22:20:28", "event": "eval", "step": 1600, "epoch": 0.6903991370010788, "eval_loss": 0.7706249952316284, "eval_runtime": 821.4894, "eval_samples_per_second": 2.508, "eval_steps_per_second": 2.508, "perplexity": 2.1611165193057653}
{"ts": "2025-12-23T22:50:30", "event": "eval", "step": 1700, "epoch": 0.7335490830636462, "eval_loss": 0.7649155259132385, "eval_runtime": 828.3542, "eval_samples_per_second": 2.487, "eval_steps_per_second": 2.487, "perplexity": 2.1488128479852233}
{"ts": "2025-12-23T23:20:32", "event": "eval", "step": 1800, "epoch": 0.7766990291262136, "eval_loss": 0.7598537802696228, "eval_runtime": 838.9645, "eval_samples_per_second": 2.455, "eval_steps_per_second": 2.455, "perplexity": 2.1379635851816685}
{"ts": "2025-12-23T23:50:29", "event": "eval", "step": 1900, "epoch": 0.819848975188781, "eval_loss": 0.7533769607543945, "eval_runtime": 830.2894, "eval_samples_per_second": 2.481, "eval_steps_per_second": 2.481, "perplexity": 2.124161127175132}
{"ts": "2025-12-24T00:20:24", "event": "eval", "step": 2000, "epoch": 0.8629989212513485, "eval_loss": 0.7487027645111084, "eval_runtime": 825.141, "eval_samples_per_second": 2.497, "eval_steps_per_second": 2.497, "perplexity": 2.114255549556043}
{"ts": "2025-12-24T00:50:25", "event": "eval", "step": 2100, "epoch": 0.9061488673139159, "eval_loss": 0.7449111342430115, "eval_runtime": 832.5135, "eval_samples_per_second": 2.474, "eval_steps_per_second": 2.474, "perplexity": 2.1062542527852224}
{"ts": "2025-12-24T01:20:20", "event": "eval", "step": 2200, "epoch": 0.9492988133764833, "eval_loss": 0.7400083541870117, "eval_runtime": 838.7633, "eval_samples_per_second": 2.456, "eval_steps_per_second": 2.456, "perplexity": 2.0959530244047575}
{"ts": "2025-12-24T01:50:21", "event": "eval", "step": 2300, "epoch": 0.9924487594390508, "eval_loss": 0.7360134720802307, "eval_runtime": 825.8559, "eval_samples_per_second": 2.494, "eval_steps_per_second": 2.494, "perplexity": 2.087596641666197}
{"ts": "2025-12-24T02:20:14", "event": "eval", "step": 2400, "epoch": 1.0353829557713052, "eval_loss": 0.7346783876419067, "eval_runtime": 825.8356, "eval_samples_per_second": 2.494, "eval_steps_per_second": 2.494, "perplexity": 2.0848113835674984}
{"ts": "2025-12-24T02:50:23", "event": "eval", "step": 2500, "epoch": 1.0785329018338727, "eval_loss": 0.7294915914535522, "eval_runtime": 836.4643, "eval_samples_per_second": 2.463, "eval_steps_per_second": 2.463, "perplexity": 2.07402588709581}
{"ts": "2025-12-24T03:20:08", "event": "eval", "step": 2600, "epoch": 1.12168284789644, "eval_loss": 0.7287447452545166, "eval_runtime": 831.8138, "eval_samples_per_second": 2.477, "eval_steps_per_second": 2.477, "perplexity": 2.0724774870256564}
{"ts": "2025-12-24T03:49:59", "event": "eval", "step": 2700, "epoch": 1.1648327939590075, "eval_loss": 0.72404944896698, "eval_runtime": 824.2877, "eval_samples_per_second": 2.499, "eval_steps_per_second": 2.499, "perplexity": 2.0627694001820904}
{"ts": "2025-12-24T04:20:03", "event": "eval", "step": 2800, "epoch": 1.207982740021575, "eval_loss": 0.7220398187637329, "eval_runtime": 831.5812, "eval_samples_per_second": 2.477, "eval_steps_per_second": 2.477, "perplexity": 2.0586281590685003}
{"ts": "2025-12-24T04:50:12", "event": "eval", "step": 2900, "epoch": 1.2511326860841425, "eval_loss": 0.7182289958000183, "eval_runtime": 840.8156, "eval_samples_per_second": 2.45, "eval_steps_per_second": 2.45, "perplexity": 2.0507980207177035}
{"ts": "2025-12-24T05:20:02", "event": "eval", "step": 3000, "epoch": 1.2942826321467098, "eval_loss": 0.7160727977752686, "eval_runtime": 830.8186, "eval_samples_per_second": 2.479, "eval_steps_per_second": 2.479, "perplexity": 2.046380857926466}
{"ts": "2025-12-24T05:49:53", "event": "eval", "step": 3100, "epoch": 1.3374325782092773, "eval_loss": 0.7127581834793091, "eval_runtime": 821.1346, "eval_samples_per_second": 2.509, "eval_steps_per_second": 2.509, "perplexity": 2.0396091237232032}
{"ts": "2025-12-24T06:19:57", "event": "eval", "step": 3200, "epoch": 1.3805825242718446, "eval_loss": 0.7103806734085083, "eval_runtime": 831.4047, "eval_samples_per_second": 2.478, "eval_steps_per_second": 2.478, "perplexity": 2.034765692425877}
{"ts": "2025-12-24T06:49:54", "event": "eval", "step": 3300, "epoch": 1.423732470334412, "eval_loss": 0.7086145281791687, "eval_runtime": 838.887, "eval_samples_per_second": 2.456, "eval_steps_per_second": 2.456, "perplexity": 2.0311751723286617}
{"ts": "2025-12-24T07:19:48", "event": "eval", "step": 3400, "epoch": 1.4668824163969796, "eval_loss": 0.7067069411277771, "eval_runtime": 829.9411, "eval_samples_per_second": 2.482, "eval_steps_per_second": 2.482, "perplexity": 2.027304222131885}
{"ts": "2025-12-24T07:49:43", "event": "eval", "step": 3500, "epoch": 1.510032362459547, "eval_loss": 0.7047909498214722, "eval_runtime": 821.8811, "eval_samples_per_second": 2.506, "eval_steps_per_second": 2.506, "perplexity": 2.023423643631521}
{"ts": "2025-12-24T08:19:44", "event": "eval", "step": 3600, "epoch": 1.5531823085221144, "eval_loss": 0.701831042766571, "eval_runtime": 836.0199, "eval_samples_per_second": 2.464, "eval_steps_per_second": 2.464, "perplexity": 2.0174433526325766}
{"ts": "2025-12-24T08:49:46", "event": "eval", "step": 3700, "epoch": 1.5963322545846816, "eval_loss": 0.6999035477638245, "eval_runtime": 836.4944, "eval_samples_per_second": 2.463, "eval_steps_per_second": 2.463, "perplexity": 2.01355848588544}
{"ts": "2025-12-24T09:19:47", "event": "eval", "step": 3800, "epoch": 1.6394822006472491, "eval_loss": 0.6974969506263733, "eval_runtime": 823.7017, "eval_samples_per_second": 2.501, "eval_steps_per_second": 2.501, "perplexity": 2.008718488095619}
{"ts": "2025-12-24T09:49:37", "event": "eval", "step": 3900, "epoch": 1.6826321467098166, "eval_loss": 0.6949095726013184, "eval_runtime": 817.4497, "eval_samples_per_second": 2.52, "eval_steps_per_second": 2.52, "perplexity": 2.003527891933936}
{"ts": "2025-12-24T10:10:24", "event": "eval", "step": 4000, "epoch": 1.7257820927723841, "eval_loss": 0.6941319704055786, "eval_runtime": 498.1423, "eval_samples_per_second": 4.135, "eval_steps_per_second": 4.135, "perplexity": 2.0019705498207383}
{"ts": "2025-12-24T10:29:00", "event": "eval", "step": 4100, "epoch": 1.7689320388349514, "eval_loss": 0.6929343938827515, "eval_runtime": 497.805, "eval_samples_per_second": 4.138, "eval_steps_per_second": 4.138, "perplexity": 1.999574471920571}
{"ts": "2025-12-24T10:47:38", "event": "eval", "step": 4200, "epoch": 1.812081984897519, "eval_loss": 0.6918642520904541, "eval_runtime": 498.4981, "eval_samples_per_second": 4.132, "eval_steps_per_second": 4.132, "perplexity": 1.9974357882628415}
{"ts": "2025-12-24T11:06:16", "event": "eval", "step": 4300, "epoch": 1.8552319309600862, "eval_loss": 0.6890321373939514, "eval_runtime": 498.5289, "eval_samples_per_second": 4.132, "eval_steps_per_second": 4.132, "perplexity": 1.9917868240446748}
{"ts": "2025-12-24T11:24:55", "event": "eval", "step": 4400, "epoch": 1.8983818770226537, "eval_loss": 0.687366783618927, "eval_runtime": 497.5319, "eval_samples_per_second": 4.14, "eval_steps_per_second": 4.14, "perplexity": 1.9884725548195745}
{"ts": "2025-12-24T11:43:29", "event": "eval", "step": 4500, "epoch": 1.9415318230852212, "eval_loss": 0.6851214170455933, "eval_runtime": 497.1141, "eval_samples_per_second": 4.144, "eval_steps_per_second": 4.144, "perplexity": 1.9840127138756343}
{"ts": "2025-12-24T12:02:11", "event": "eval", "step": 4600, "epoch": 1.9846817691477887, "eval_loss": 0.6843361258506775, "eval_runtime": 498.3201, "eval_samples_per_second": 4.134, "eval_steps_per_second": 4.134, "perplexity": 1.9824552977534464}
{"ts": "2025-12-24T12:20:51", "event": "eval", "step": 4700, "epoch": 2.027615965480043, "eval_loss": 0.6861101984977722, "eval_runtime": 498.0682, "eval_samples_per_second": 4.136, "eval_steps_per_second": 4.136, "perplexity": 1.98597543904121}
{"ts": "2025-12-24T12:39:30", "event": "eval", "step": 4800, "epoch": 2.0707659115426105, "eval_loss": 0.6874316930770874, "eval_runtime": 497.7977, "eval_samples_per_second": 4.138, "eval_steps_per_second": 4.138, "perplexity": 1.988601629684719}
{"ts": "2025-12-24T12:47:48", "event": "eval", "step": 4800, "epoch": 2.0707659115426105, "eval_loss": 0.6851214170455933, "eval_runtime": 497.0682, "eval_samples_per_second": 4.144, "eval_steps_per_second": 4.144, "perplexity": 1.9840127138756343}