SirajRLX's picture
Upload folder using huggingface_hub
4d338a0 verified
{"ts": "2025-12-26T18:34:59", "event": "eval", "step": 100, "epoch": 0.04219409282700422, "eval_loss": 1.138856053352356, "eval_runtime": 859.7128, "eval_samples_per_second": 2.451, "eval_steps_per_second": 2.451, "perplexity": 3.1231935540832674}
{"ts": "2025-12-26T19:05:22", "event": "eval", "step": 200, "epoch": 0.08438818565400844, "eval_loss": 0.995743453502655, "eval_runtime": 846.8257, "eval_samples_per_second": 2.488, "eval_steps_per_second": 2.488, "perplexity": 2.7067359257317922}
{"ts": "2025-12-26T19:35:57", "event": "eval", "step": 300, "epoch": 0.12658227848101267, "eval_loss": 0.9517185688018799, "eval_runtime": 860.0287, "eval_samples_per_second": 2.45, "eval_steps_per_second": 2.45, "perplexity": 2.5901571998746475}
{"ts": "2025-12-26T20:06:52", "event": "eval", "step": 400, "epoch": 0.16877637130801687, "eval_loss": 0.9282881617546082, "eval_runtime": 869.6867, "eval_samples_per_second": 2.423, "eval_steps_per_second": 2.423, "perplexity": 2.5301742193066197}
{"ts": "2025-12-26T20:37:22", "event": "eval", "step": 500, "epoch": 0.2109704641350211, "eval_loss": 0.9080732464790344, "eval_runtime": 857.0753, "eval_samples_per_second": 2.458, "eval_steps_per_second": 2.458, "perplexity": 2.4795404646097325}
{"ts": "2025-12-26T21:07:55", "event": "eval", "step": 600, "epoch": 0.25316455696202533, "eval_loss": 0.8903881311416626, "eval_runtime": 845.9969, "eval_samples_per_second": 2.491, "eval_steps_per_second": 2.491, "perplexity": 2.4360749843862655}
{"ts": "2025-12-26T21:38:29", "event": "eval", "step": 700, "epoch": 0.29535864978902954, "eval_loss": 0.8730722069740295, "eval_runtime": 858.184, "eval_samples_per_second": 2.455, "eval_steps_per_second": 2.455, "perplexity": 2.3942552136153896}
{"ts": "2025-12-26T22:09:04", "event": "eval", "step": 800, "epoch": 0.33755274261603374, "eval_loss": 0.8635594248771667, "eval_runtime": 865.9348, "eval_samples_per_second": 2.433, "eval_steps_per_second": 2.433, "perplexity": 2.371587174483758}
{"ts": "2025-12-26T22:39:42", "event": "eval", "step": 900, "epoch": 0.379746835443038, "eval_loss": 0.8491304516792297, "eval_runtime": 852.6211, "eval_samples_per_second": 2.471, "eval_steps_per_second": 2.471, "perplexity": 2.3376133001985813}
{"ts": "2025-12-26T23:10:19", "event": "eval", "step": 1000, "epoch": 0.4219409282700422, "eval_loss": 0.8388314247131348, "eval_runtime": 847.4828, "eval_samples_per_second": 2.486, "eval_steps_per_second": 2.486, "perplexity": 2.3136617085393727}
{"ts": "2025-12-26T23:41:01", "event": "eval", "step": 1100, "epoch": 0.4641350210970464, "eval_loss": 0.8283821940422058, "eval_runtime": 861.0464, "eval_samples_per_second": 2.447, "eval_steps_per_second": 2.447, "perplexity": 2.2896115950724094}
{"ts": "2025-12-27T00:11:32", "event": "eval", "step": 1200, "epoch": 0.5063291139240507, "eval_loss": 0.8186545968055725, "eval_runtime": 862.1638, "eval_samples_per_second": 2.444, "eval_steps_per_second": 2.444, "perplexity": 2.267447153803737}
{"ts": "2025-12-27T00:42:14", "event": "eval", "step": 1300, "epoch": 0.5485232067510548, "eval_loss": 0.808323085308075, "eval_runtime": 853.577, "eval_samples_per_second": 2.468, "eval_steps_per_second": 2.468, "perplexity": 2.244141595588398}
{"ts": "2025-12-27T01:12:54", "event": "eval", "step": 1400, "epoch": 0.5907172995780591, "eval_loss": 0.8009664416313171, "eval_runtime": 851.9417, "eval_samples_per_second": 2.473, "eval_steps_per_second": 2.473, "perplexity": 2.227692823570967}
{"ts": "2025-12-27T01:43:40", "event": "eval", "step": 1500, "epoch": 0.6329113924050633, "eval_loss": 0.7896141409873962, "eval_runtime": 865.9069, "eval_samples_per_second": 2.433, "eval_steps_per_second": 2.433, "perplexity": 2.2025463898941693}
{"ts": "2025-12-27T02:14:07", "event": "eval", "step": 1600, "epoch": 0.6751054852320675, "eval_loss": 0.7836604714393616, "eval_runtime": 861.5352, "eval_samples_per_second": 2.446, "eval_steps_per_second": 2.446, "perplexity": 2.189472115099779}
{"ts": "2025-12-27T02:44:39", "event": "eval", "step": 1700, "epoch": 0.7172995780590717, "eval_loss": 0.7783148884773254, "eval_runtime": 846.1986, "eval_samples_per_second": 2.49, "eval_steps_per_second": 2.49, "perplexity": 2.1777993369634507}
{"ts": "2025-12-27T03:15:22", "event": "eval", "step": 1800, "epoch": 0.759493670886076, "eval_loss": 0.7719914317131042, "eval_runtime": 853.1943, "eval_samples_per_second": 2.47, "eval_steps_per_second": 2.47, "perplexity": 2.16407156624064}
{"ts": "2025-12-27T03:45:59", "event": "eval", "step": 1900, "epoch": 0.8016877637130801, "eval_loss": 0.7648926973342896, "eval_runtime": 865.9394, "eval_samples_per_second": 2.433, "eval_steps_per_second": 2.433, "perplexity": 2.148763794201393}
{"ts": "2025-12-27T04:16:30", "event": "eval", "step": 2000, "epoch": 0.8438818565400844, "eval_loss": 0.7587011456489563, "eval_runtime": 856.2276, "eval_samples_per_second": 2.461, "eval_steps_per_second": 2.461, "perplexity": 2.135500714003631}
{"ts": "2025-12-27T04:47:14", "event": "eval", "step": 2100, "epoch": 0.8860759493670886, "eval_loss": 0.7559094429016113, "eval_runtime": 847.8311, "eval_samples_per_second": 2.485, "eval_steps_per_second": 2.485, "perplexity": 2.1295473446786564}
{"ts": "2025-12-27T05:17:56", "event": "eval", "step": 2200, "epoch": 0.9282700421940928, "eval_loss": 0.7497645616531372, "eval_runtime": 856.8766, "eval_samples_per_second": 2.459, "eval_steps_per_second": 2.459, "perplexity": 2.116501652297792}
{"ts": "2025-12-27T05:48:33", "event": "eval", "step": 2300, "epoch": 0.9704641350210971, "eval_loss": 0.7464568614959717, "eval_runtime": 864.2128, "eval_samples_per_second": 2.438, "eval_steps_per_second": 2.438, "perplexity": 2.1095124648903094}
{"ts": "2025-12-27T06:18:53", "event": "eval", "step": 2400, "epoch": 1.0126582278481013, "eval_loss": 0.7421699166297913, "eval_runtime": 854.2185, "eval_samples_per_second": 2.467, "eval_steps_per_second": 2.467, "perplexity": 2.100488457789446}
{"ts": "2025-12-27T06:49:31", "event": "eval", "step": 2500, "epoch": 1.0548523206751055, "eval_loss": 0.741338849067688, "eval_runtime": 847.7478, "eval_samples_per_second": 2.485, "eval_steps_per_second": 2.485, "perplexity": 2.098743535142341}
{"ts": "2025-12-27T07:20:16", "event": "eval", "step": 2600, "epoch": 1.0970464135021096, "eval_loss": 0.7377332448959351, "eval_runtime": 859.6612, "eval_samples_per_second": 2.451, "eval_steps_per_second": 2.451, "perplexity": 2.091189922548451}
{"ts": "2025-12-27T07:51:03", "event": "eval", "step": 2700, "epoch": 1.139240506329114, "eval_loss": 0.7335711717605591, "eval_runtime": 861.9651, "eval_samples_per_second": 2.444, "eval_steps_per_second": 2.444, "perplexity": 2.0825043247357775}
{"ts": "2025-12-27T08:21:29", "event": "eval", "step": 2800, "epoch": 1.1814345991561181, "eval_loss": 0.7298192977905273, "eval_runtime": 849.544, "eval_samples_per_second": 2.48, "eval_steps_per_second": 2.48, "perplexity": 2.074705669900544}
{"ts": "2025-12-27T08:52:09", "event": "eval", "step": 2900, "epoch": 1.2236286919831223, "eval_loss": 0.7281573414802551, "eval_runtime": 854.563, "eval_samples_per_second": 2.466, "eval_steps_per_second": 2.466, "perplexity": 2.0712604634048333}
{"ts": "2025-12-27T09:23:05", "event": "eval", "step": 3000, "epoch": 1.2658227848101267, "eval_loss": 0.72515869140625, "eval_runtime": 868.0515, "eval_samples_per_second": 2.427, "eval_steps_per_second": 2.427, "perplexity": 2.0650587810476666}
{"ts": "2025-12-27T09:53:39", "event": "eval", "step": 3100, "epoch": 1.3080168776371308, "eval_loss": 0.7225774526596069, "eval_runtime": 862.4006, "eval_samples_per_second": 2.443, "eval_steps_per_second": 2.443, "perplexity": 2.0597352449225896}
{"ts": "2025-12-27T10:24:10", "event": "eval", "step": 3200, "epoch": 1.350210970464135, "eval_loss": 0.7200453281402588, "eval_runtime": 846.2953, "eval_samples_per_second": 2.49, "eval_steps_per_second": 2.49, "perplexity": 2.0545263363912047}
{"ts": "2025-12-27T10:54:40", "event": "eval", "step": 3300, "epoch": 1.3924050632911391, "eval_loss": 0.7173135876655579, "eval_runtime": 853.5344, "eval_samples_per_second": 2.469, "eval_steps_per_second": 2.469, "perplexity": 2.0489215625209867}
{"ts": "2025-12-27T11:25:25", "event": "eval", "step": 3400, "epoch": 1.4345991561181435, "eval_loss": 0.715917706489563, "eval_runtime": 868.51, "eval_samples_per_second": 2.426, "eval_steps_per_second": 2.426, "perplexity": 2.046063506698008}
{"ts": "2025-12-27T11:55:47", "event": "eval", "step": 3500, "epoch": 1.4767932489451476, "eval_loss": 0.7155047059059143, "eval_runtime": 855.8428, "eval_samples_per_second": 2.462, "eval_steps_per_second": 2.462, "perplexity": 2.0452186557495358}
{"ts": "2025-12-27T12:26:22", "event": "eval", "step": 3600, "epoch": 1.518987341772152, "eval_loss": 0.7118256688117981, "eval_runtime": 851.3079, "eval_samples_per_second": 2.475, "eval_steps_per_second": 2.475, "perplexity": 2.0377080448290807}
{"ts": "2025-12-27T12:57:01", "event": "eval", "step": 3700, "epoch": 1.5611814345991561, "eval_loss": 0.7099412679672241, "eval_runtime": 857.2273, "eval_samples_per_second": 2.458, "eval_steps_per_second": 2.458, "perplexity": 2.0338718017134907}
{"ts": "2025-12-27T13:27:39", "event": "eval", "step": 3800, "epoch": 1.6033755274261603, "eval_loss": 0.7080941200256348, "eval_runtime": 865.6774, "eval_samples_per_second": 2.434, "eval_steps_per_second": 2.434, "perplexity": 2.030118407206169}
{"ts": "2025-12-27T13:58:20", "event": "eval", "step": 3900, "epoch": 1.6455696202531644, "eval_loss": 0.7049403786659241, "eval_runtime": 854.9866, "eval_samples_per_second": 2.464, "eval_steps_per_second": 2.464, "perplexity": 2.023726024080043}
{"ts": "2025-12-27T14:28:59", "event": "eval", "step": 4000, "epoch": 1.6877637130801688, "eval_loss": 0.7027890682220459, "eval_runtime": 848.7529, "eval_samples_per_second": 2.482, "eval_steps_per_second": 2.482, "perplexity": 2.0193770408327394}
{"ts": "2025-12-27T14:59:26", "event": "eval", "step": 4100, "epoch": 1.729957805907173, "eval_loss": 0.7022181153297424, "eval_runtime": 844.6405, "eval_samples_per_second": 2.495, "eval_steps_per_second": 2.495, "perplexity": 2.0182244007535304}
{"ts": "2025-12-27T15:20:08", "event": "eval", "step": 4200, "epoch": 1.7721518987341773, "eval_loss": 0.6993561387062073, "eval_runtime": 542.0281, "eval_samples_per_second": 3.887, "eval_steps_per_second": 3.887, "perplexity": 2.012456547365305}
{"ts": "2025-12-27T15:39:13", "event": "eval", "step": 4300, "epoch": 1.8143459915611815, "eval_loss": 0.6981000900268555, "eval_runtime": 514.4659, "eval_samples_per_second": 4.096, "eval_steps_per_second": 4.096, "perplexity": 2.0099303907966624}
{"ts": "2025-12-27T15:58:13", "event": "eval", "step": 4400, "epoch": 1.8565400843881856, "eval_loss": 0.6961485147476196, "eval_runtime": 513.5724, "eval_samples_per_second": 4.103, "eval_steps_per_second": 4.103, "perplexity": 2.0060116854010337}
{"ts": "2025-12-27T16:17:15", "event": "eval", "step": 4500, "epoch": 1.8987341772151898, "eval_loss": 0.6938078999519348, "eval_runtime": 513.615, "eval_samples_per_second": 4.102, "eval_steps_per_second": 4.102, "perplexity": 2.0013218754302557}
{"ts": "2025-12-27T16:38:02", "event": "eval", "step": 4600, "epoch": 1.9409282700421941, "eval_loss": 0.6930755376815796, "eval_runtime": 617.8927, "eval_samples_per_second": 3.41, "eval_steps_per_second": 3.41, "perplexity": 1.999856719375848}
{"ts": "2025-12-27T16:58:16", "event": "eval", "step": 4700, "epoch": 1.9831223628691983, "eval_loss": 0.6923081278800964, "eval_runtime": 514.7729, "eval_samples_per_second": 4.093, "eval_steps_per_second": 4.093, "perplexity": 1.9983225984528428}
{"ts": "2025-12-27T17:17:24", "event": "eval", "step": 4800, "epoch": 2.0253164556962027, "eval_loss": 0.6924457550048828, "eval_runtime": 514.0427, "eval_samples_per_second": 4.099, "eval_steps_per_second": 4.099, "perplexity": 1.998597640772671}
{"ts": "2025-12-27T17:36:32", "event": "eval", "step": 4900, "epoch": 2.067510548523207, "eval_loss": 0.6941288113594055, "eval_runtime": 513.4497, "eval_samples_per_second": 4.104, "eval_steps_per_second": 4.104, "perplexity": 2.0019642255133236}
{"ts": "2025-12-27T17:58:22", "event": "eval", "step": 5000, "epoch": 2.109704641350211, "eval_loss": 0.6908889412879944, "eval_runtime": 675.8398, "eval_samples_per_second": 3.118, "eval_steps_per_second": 3.118, "perplexity": 1.9954886172641344}
{"ts": "2025-12-27T18:23:03", "event": "eval", "step": 5100, "epoch": 2.151898734177215, "eval_loss": 0.6902023553848267, "eval_runtime": 733.915, "eval_samples_per_second": 2.871, "eval_steps_per_second": 2.871, "perplexity": 1.9941190131388347}
{"ts": "2025-12-27T18:55:59", "event": "eval", "step": 5200, "epoch": 2.1940928270042193, "eval_loss": 0.6915348172187805, "eval_runtime": 1167.9782, "eval_samples_per_second": 1.804, "eval_steps_per_second": 1.804, "perplexity": 1.9967778716365487}
{"ts": "2025-12-27T19:32:22", "event": "eval", "step": 5300, "epoch": 2.2362869198312234, "eval_loss": 0.6898328065872192, "eval_runtime": 739.3794, "eval_samples_per_second": 2.85, "eval_steps_per_second": 2.85, "perplexity": 1.993382225003213}
{"ts": "2025-12-27T19:58:17", "event": "eval", "step": 5400, "epoch": 2.278481012658228, "eval_loss": 0.6875645518302917, "eval_runtime": 861.3558, "eval_samples_per_second": 2.446, "eval_steps_per_second": 2.446, "perplexity": 1.988865850369486}
{"ts": "2025-12-27T20:31:44", "event": "eval", "step": 5500, "epoch": 2.320675105485232, "eval_loss": 0.6867148876190186, "eval_runtime": 941.3545, "eval_samples_per_second": 2.238, "eval_steps_per_second": 2.238, "perplexity": 1.9871766999423568}
{"ts": "2025-12-27T21:05:14", "event": "eval", "step": 5600, "epoch": 2.3628691983122363, "eval_loss": 0.6851074695587158, "eval_runtime": 938.5536, "eval_samples_per_second": 2.245, "eval_steps_per_second": 2.245, "perplexity": 1.9839850420773193}
{"ts": "2025-12-27T21:38:52", "event": "eval", "step": 5700, "epoch": 2.4050632911392404, "eval_loss": 0.6841402053833008, "eval_runtime": 941.6641, "eval_samples_per_second": 2.238, "eval_steps_per_second": 2.238, "perplexity": 1.9820669322305768}
{"ts": "2025-12-27T22:09:41", "event": "eval", "step": 5800, "epoch": 2.4472573839662446, "eval_loss": 0.6835155487060547, "eval_runtime": 758.407, "eval_samples_per_second": 2.778, "eval_steps_per_second": 2.778, "perplexity": 1.9808292075033642}
{"ts": "2025-12-27T22:28:42", "event": "eval", "step": 5900, "epoch": 2.489451476793249, "eval_loss": 0.6820966005325317, "eval_runtime": 513.3515, "eval_samples_per_second": 4.104, "eval_steps_per_second": 4.104, "perplexity": 1.9780205066890182}
{"ts": "2025-12-27T22:47:43", "event": "eval", "step": 6000, "epoch": 2.5316455696202533, "eval_loss": 0.6813357472419739, "eval_runtime": 513.5491, "eval_samples_per_second": 4.103, "eval_steps_per_second": 4.103, "perplexity": 1.9765160956683256}
{"ts": "2025-12-27T23:06:47", "event": "eval", "step": 6100, "epoch": 2.5738396624472575, "eval_loss": 0.6812278628349304, "eval_runtime": 513.4749, "eval_samples_per_second": 4.103, "eval_steps_per_second": 4.103, "perplexity": 1.9763028719032991}
{"ts": "2025-12-27T23:25:56", "event": "eval", "step": 6200, "epoch": 2.6160337552742616, "eval_loss": 0.6795271039009094, "eval_runtime": 513.2393, "eval_samples_per_second": 4.105, "eval_steps_per_second": 4.105, "perplexity": 1.972944513825857}
{"ts": "2025-12-27T23:44:50", "event": "eval", "step": 6300, "epoch": 2.6582278481012658, "eval_loss": 0.6781066656112671, "eval_runtime": 512.3669, "eval_samples_per_second": 4.112, "eval_steps_per_second": 4.112, "perplexity": 1.9701440573037758}
{"ts": "2025-12-28T00:03:48", "event": "eval", "step": 6400, "epoch": 2.70042194092827, "eval_loss": 0.6764505505561829, "eval_runtime": 512.7682, "eval_samples_per_second": 4.109, "eval_steps_per_second": 4.109, "perplexity": 1.9668839723527984}
{"ts": "2025-12-28T00:22:46", "event": "eval", "step": 6500, "epoch": 2.742616033755274, "eval_loss": 0.6768895387649536, "eval_runtime": 513.0657, "eval_samples_per_second": 4.107, "eval_steps_per_second": 4.107, "perplexity": 1.9677476007721588}
{"ts": "2025-12-28T00:41:51", "event": "eval", "step": 6600, "epoch": 2.7848101265822782, "eval_loss": 0.6737648844718933, "eval_runtime": 512.921, "eval_samples_per_second": 4.108, "eval_steps_per_second": 4.108, "perplexity": 1.9616086658032716}
{"ts": "2025-12-28T01:00:52", "event": "eval", "step": 6700, "epoch": 2.827004219409283, "eval_loss": 0.6737436056137085, "eval_runtime": 513.2559, "eval_samples_per_second": 4.105, "eval_steps_per_second": 4.105, "perplexity": 1.961566925454753}
{"ts": "2025-12-28T01:19:55", "event": "eval", "step": 6800, "epoch": 2.869198312236287, "eval_loss": 0.6721681356430054, "eval_runtime": 513.1285, "eval_samples_per_second": 4.106, "eval_steps_per_second": 4.106, "perplexity": 1.9584789687983855}
{"ts": "2025-12-28T01:38:47", "event": "eval", "step": 6900, "epoch": 2.911392405063291, "eval_loss": 0.6713213920593262, "eval_runtime": 513.1265, "eval_samples_per_second": 4.106, "eval_steps_per_second": 4.106, "perplexity": 1.9568213411895954}
{"ts": "2025-12-28T01:57:47", "event": "eval", "step": 7000, "epoch": 2.9535864978902953, "eval_loss": 0.6706293225288391, "eval_runtime": 513.4396, "eval_samples_per_second": 4.104, "eval_steps_per_second": 4.104, "perplexity": 1.955467553274469}
{"ts": "2025-12-28T02:16:49", "event": "eval", "step": 7100, "epoch": 2.9957805907173, "eval_loss": 0.6692973375320435, "eval_runtime": 512.8985, "eval_samples_per_second": 4.108, "eval_steps_per_second": 4.108, "perplexity": 1.9528646337415076}
{"ts": "2025-12-28T02:35:50", "event": "eval", "step": 7200, "epoch": 3.037974683544304, "eval_loss": 0.6751418709754944, "eval_runtime": 513.8972, "eval_samples_per_second": 4.1, "eval_steps_per_second": 4.1, "perplexity": 1.9643116350103986}
{"ts": "2025-12-28T02:54:46", "event": "eval", "step": 7300, "epoch": 3.080168776371308, "eval_loss": 0.678839385509491, "eval_runtime": 513.7013, "eval_samples_per_second": 4.102, "eval_steps_per_second": 4.102, "perplexity": 1.9715881500500663}
{"ts": "2025-12-28T03:13:51", "event": "eval", "step": 7400, "epoch": 3.1223628691983123, "eval_loss": 0.676459550857544, "eval_runtime": 513.5901, "eval_samples_per_second": 4.102, "eval_steps_per_second": 4.102, "perplexity": 1.9669016749809562}
{"ts": "2025-12-28T03:32:54", "event": "eval", "step": 7500, "epoch": 3.1645569620253164, "eval_loss": 0.6774632334709167, "eval_runtime": 513.4064, "eval_samples_per_second": 4.104, "eval_steps_per_second": 4.104, "perplexity": 1.9688768110333967}
{"ts": "2025-12-28T03:51:52", "event": "eval", "step": 7600, "epoch": 3.2067510548523206, "eval_loss": 0.6755207777023315, "eval_runtime": 513.9779, "eval_samples_per_second": 4.099, "eval_steps_per_second": 4.099, "perplexity": 1.965056066928733}
{"ts": "2025-12-28T04:00:24", "event": "eval", "step": 7600, "epoch": 3.2067510548523206, "eval_loss": 0.6706293225288391, "eval_runtime": 511.6513, "eval_samples_per_second": 4.118, "eval_steps_per_second": 4.118, "perplexity": 1.955467553274469}