SirajRLX's picture
Add Devstral-14B CPT training run
a555835 verified
{"ts": "2025-12-22T12:54:44", "event": "eval", "step": 50, "epoch": 0.14585232452142205, "eval_loss": 0.7892261147499084, "eval_runtime": 973.2157, "eval_samples_per_second": 0.649, "eval_steps_per_second": 0.649, "perplexity": 2.2016919098966565}
{"ts": "2025-12-22T14:16:39", "event": "eval", "step": 100, "epoch": 0.2917046490428441, "eval_loss": 0.6552971005439758, "eval_runtime": 966.7072, "eval_samples_per_second": 0.654, "eval_steps_per_second": 0.654, "perplexity": 1.9257145632388668}
{"ts": "2025-12-22T15:50:59", "event": "eval", "step": 150, "epoch": 0.4375569735642662, "eval_loss": 0.5903874635696411, "eval_runtime": 1186.9542, "eval_samples_per_second": 0.532, "eval_steps_per_second": 0.532, "perplexity": 1.8046875306209718}
{"ts": "2025-12-22T17:31:15", "event": "eval", "step": 200, "epoch": 0.5834092980856882, "eval_loss": 0.5414339303970337, "eval_runtime": 1180.7894, "eval_samples_per_second": 0.535, "eval_steps_per_second": 0.535, "perplexity": 1.7184692616188395}
{"ts": "2025-12-22T19:11:10", "event": "eval", "step": 250, "epoch": 0.7292616226071102, "eval_loss": 0.5038471221923828, "eval_runtime": 1175.0375, "eval_samples_per_second": 0.538, "eval_steps_per_second": 0.538, "perplexity": 1.6550763193760132}
{"ts": "2025-12-22T20:51:33", "event": "eval", "step": 300, "epoch": 0.8751139471285324, "eval_loss": 0.4752846360206604, "eval_runtime": 1189.1666, "eval_samples_per_second": 0.531, "eval_steps_per_second": 0.531, "perplexity": 1.6084719613930785}
{"ts": "2025-12-22T22:31:42", "event": "eval", "step": 350, "epoch": 1.0204193254329992, "eval_loss": 0.44924086332321167, "eval_runtime": 1214.6648, "eval_samples_per_second": 0.52, "eval_steps_per_second": 0.52, "perplexity": 1.5671220739753133}
{"ts": "2025-12-23T00:12:28", "event": "eval", "step": 400, "epoch": 1.1662716499544212, "eval_loss": 0.4318464398384094, "eval_runtime": 1206.0306, "eval_samples_per_second": 0.524, "eval_steps_per_second": 0.524, "perplexity": 1.5400985992121177}
{"ts": "2025-12-23T01:52:27", "event": "eval", "step": 450, "epoch": 1.3121239744758433, "eval_loss": 0.4178673028945923, "eval_runtime": 1197.5534, "eval_samples_per_second": 0.528, "eval_steps_per_second": 0.528, "perplexity": 1.5187191313977852}
{"ts": "2025-12-23T03:32:49", "event": "eval", "step": 500, "epoch": 1.4579762989972653, "eval_loss": 0.40706494450569153, "eval_runtime": 1189.1593, "eval_samples_per_second": 0.531, "eval_steps_per_second": 0.531, "perplexity": 1.5024016752277602}
{"ts": "2025-12-23T04:52:38", "event": "eval", "step": 550, "epoch": 1.6038286235186874, "eval_loss": 0.40037089586257935, "eval_runtime": 893.7411, "eval_samples_per_second": 0.707, "eval_steps_per_second": 0.707, "perplexity": 1.4923781118724992}
{"ts": "2025-12-23T06:06:46", "event": "eval", "step": 600, "epoch": 1.7496809480401094, "eval_loss": 0.3965963125228882, "eval_runtime": 912.3102, "eval_samples_per_second": 0.693, "eval_steps_per_second": 0.693, "perplexity": 1.4867556242644535}
{"ts": "2025-12-23T07:20:44", "event": "eval", "step": 650, "epoch": 1.8955332725615315, "eval_loss": 0.3949255049228668, "eval_runtime": 903.6455, "eval_samples_per_second": 0.699, "eval_steps_per_second": 0.699, "perplexity": 1.484273615724821}
{"ts": "2025-12-23T08:18:17", "event": "eval", "step": 686, "epoch": 2.0, "eval_loss": 0.3965963125228882, "eval_runtime": 916.1187, "eval_samples_per_second": 0.69, "eval_steps_per_second": 0.69, "perplexity": 1.4867556242644535}