{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 200.0, "global_step": 35, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "debug/num_lat_loss": 1783.0, "debug/num_lat_total": 2241.0, "debug/num_tok_loss": 1783.0, "debug/num_tok_total": 2241.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.214554786682129, "train/diffusion_loss": 0.4518924653530121 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1748.0, "debug/num_lat_total": 2156.0, "debug/num_tok_loss": 1748.0, "debug/num_tok_total": 2156.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.158607482910156, "train/diffusion_loss": 0.4541515111923218 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1690.0, "debug/num_lat_total": 2009.0, "debug/num_tok_loss": 1690.0, "debug/num_tok_total": 2009.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.157901763916016, "train/diffusion_loss": 0.4803432524204254 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1741.0, "debug/num_lat_total": 2238.0, "debug/num_tok_loss": 1741.0, "debug/num_tok_total": 2238.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.17225170135498, "train/diffusion_loss": 0.4446203410625458 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1734.0, "debug/num_lat_total": 2185.0, "debug/num_tok_loss": 1734.0, "debug/num_tok_total": 2185.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.16765022277832, "train/diffusion_loss": 0.45456695556640625 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1681.0, "debug/num_lat_total": 2020.0, "debug/num_tok_loss": 1681.0, "debug/num_tok_total": 2020.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.191916465759277, "train/diffusion_loss": 0.45108145475387573 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1787.0, "debug/num_lat_total": 2334.0, "debug/num_tok_loss": 1787.0, "debug/num_tok_total": 2334.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.1714506149292, "train/diffusion_loss": 0.4591076970100403 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 2334.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 2334.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.179821014404297, "train/diffusion_loss": 0.40789151191711426 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1796.0, "debug/num_lat_total": 2434.0, "debug/num_tok_loss": 1796.0, "debug/num_tok_total": 2434.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.161667823791504, "train/diffusion_loss": 0.4303126931190491 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1696.0, "debug/num_lat_total": 2107.0, "debug/num_tok_loss": 1696.0, "debug/num_tok_total": 2107.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.184703826904297, "train/diffusion_loss": 0.4528208374977112 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1774.0, "debug/num_lat_total": 2139.0, "debug/num_tok_loss": 1774.0, "debug/num_tok_total": 2139.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.163565635681152, "train/diffusion_loss": 0.46860605478286743 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1779.0, "debug/num_lat_total": 2193.0, "debug/num_tok_loss": 1779.0, "debug/num_tok_total": 2193.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.173012733459473, "train/diffusion_loss": 0.4421927332878113 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2251.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2251.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.135828018188477, "train/diffusion_loss": 0.45780879259109497 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1741.0, "debug/num_lat_total": 2298.0, "debug/num_tok_loss": 1741.0, "debug/num_tok_total": 2298.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.20445728302002, "train/diffusion_loss": 0.4124974012374878 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1808.0, "debug/num_lat_total": 2252.0, "debug/num_tok_loss": 1808.0, "debug/num_tok_total": 2252.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.17200756072998, "train/diffusion_loss": 0.45158717036247253 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1880.0, "debug/num_lat_total": 2461.0, "debug/num_tok_loss": 1880.0, "debug/num_tok_total": 2461.0, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "train/ce_loss": 12.16403865814209, "train/diffusion_loss": 0.43170061707496643 }, { "epoch": 0, "step": 0, "train/learning_rate_real": 0.0 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 2182.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 2182.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.182713508605957, "train/diffusion_loss": 0.45220285654067993 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1716.0, "debug/num_lat_total": 2258.0, "debug/num_tok_loss": 1716.0, "debug/num_tok_total": 2258.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.187424659729004, "train/diffusion_loss": 0.4251019060611725 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1710.0, "debug/num_lat_total": 2096.0, "debug/num_tok_loss": 1710.0, "debug/num_tok_total": 2096.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.186864852905273, "train/diffusion_loss": 0.47552168369293213 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1742.0, "debug/num_lat_total": 2151.0, "debug/num_tok_loss": 1742.0, "debug/num_tok_total": 2151.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.177823066711426, "train/diffusion_loss": 0.47840219736099243 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1786.0, "debug/num_lat_total": 2338.0, "debug/num_tok_loss": 1786.0, "debug/num_tok_total": 2338.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.171921730041504, "train/diffusion_loss": 0.43838560581207275 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1778.0, "debug/num_lat_total": 2235.0, "debug/num_tok_loss": 1778.0, "debug/num_tok_total": 2235.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.19234561920166, "train/diffusion_loss": 0.4630849361419678 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 2157.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 2157.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.180907249450684, "train/diffusion_loss": 0.4821509122848511 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1655.0, "debug/num_lat_total": 2200.0, "debug/num_tok_loss": 1655.0, "debug/num_tok_total": 2200.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.166112899780273, "train/diffusion_loss": 0.4305018186569214 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1735.0, "debug/num_lat_total": 2071.0, "debug/num_tok_loss": 1735.0, "debug/num_tok_total": 2071.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.178923606872559, "train/diffusion_loss": 0.48272061347961426 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1682.0, "debug/num_lat_total": 2056.0, "debug/num_tok_loss": 1682.0, "debug/num_tok_total": 2056.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.184454917907715, "train/diffusion_loss": 0.48283350467681885 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1805.0, "debug/num_lat_total": 2212.0, "debug/num_tok_loss": 1805.0, "debug/num_tok_total": 2212.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.114720344543457, "train/diffusion_loss": 0.48657673597335815 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1695.0, "debug/num_lat_total": 2086.0, "debug/num_tok_loss": 1695.0, "debug/num_tok_total": 2086.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.194059371948242, "train/diffusion_loss": 0.4509486258029938 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1706.0, "debug/num_lat_total": 2081.0, "debug/num_tok_loss": 1706.0, "debug/num_tok_total": 2081.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.177267074584961, "train/diffusion_loss": 0.46584227681159973 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1719.0, "debug/num_lat_total": 2106.0, "debug/num_tok_loss": 1719.0, "debug/num_tok_total": 2106.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.15927791595459, "train/diffusion_loss": 0.4715038239955902 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1726.0, "debug/num_lat_total": 2112.0, "debug/num_tok_loss": 1726.0, "debug/num_tok_total": 2112.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.164405822753906, "train/diffusion_loss": 0.48125794529914856 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1695.0, "debug/num_lat_total": 2287.0, "debug/num_tok_loss": 1695.0, "debug/num_tok_total": 2287.0, "epoch": 0.14953271028037382, "step": 1 }, { "epoch": 0.14953271028037382, "step": 1, "train/ce_loss": 12.164380073547363, "train/diffusion_loss": 0.43863794207572937 }, { "epoch": 0.14953271028037382, "step": 1, "train/learning_rate_real": 1.25e-05 }, { "debug/num_lat_loss": 1780.0, "debug/num_lat_total": 2213.0, "debug/num_tok_loss": 1780.0, "debug/num_tok_total": 2213.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.215859413146973, "train/diffusion_loss": 0.458553284406662 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1716.0, "debug/num_lat_total": 2125.0, "debug/num_tok_loss": 1716.0, "debug/num_tok_total": 2125.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.206511497497559, "train/diffusion_loss": 0.4523429274559021 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1770.0, "debug/num_lat_total": 2352.0, "debug/num_tok_loss": 1770.0, "debug/num_tok_total": 2352.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.186681747436523, "train/diffusion_loss": 0.4239839017391205 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1761.0, "debug/num_lat_total": 2317.0, "debug/num_tok_loss": 1761.0, "debug/num_tok_total": 2317.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.174463272094727, "train/diffusion_loss": 0.41487666964530945 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1730.0, "debug/num_lat_total": 2167.0, "debug/num_tok_loss": 1730.0, "debug/num_tok_total": 2167.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.153246879577637, "train/diffusion_loss": 0.46722397208213806 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1732.0, "debug/num_lat_total": 2227.0, "debug/num_tok_loss": 1732.0, "debug/num_tok_total": 2227.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.171008110046387, "train/diffusion_loss": 0.44303643703460693 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1838.0, "debug/num_lat_total": 2150.0, "debug/num_tok_loss": 1838.0, "debug/num_tok_total": 2150.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.203962326049805, "train/diffusion_loss": 0.4903823733329773 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1723.0, "debug/num_lat_total": 2276.0, "debug/num_tok_loss": 1723.0, "debug/num_tok_total": 2276.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.200774192810059, "train/diffusion_loss": 0.4448697865009308 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 2379.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 2379.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.157685279846191, "train/diffusion_loss": 0.40528759360313416 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1707.0, "debug/num_lat_total": 2101.0, "debug/num_tok_loss": 1707.0, "debug/num_tok_total": 2101.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.178916931152344, "train/diffusion_loss": 0.4575231075286865 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1670.0, "debug/num_lat_total": 2013.0, "debug/num_tok_loss": 1670.0, "debug/num_tok_total": 2013.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.195891380310059, "train/diffusion_loss": 0.44977766275405884 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1706.0, "debug/num_lat_total": 2293.0, "debug/num_tok_loss": 1706.0, "debug/num_tok_total": 2293.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.192096710205078, "train/diffusion_loss": 0.41044771671295166 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1767.0, "debug/num_lat_total": 2400.0, "debug/num_tok_loss": 1767.0, "debug/num_tok_total": 2400.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.169843673706055, "train/diffusion_loss": 0.40407317876815796 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1656.0, "debug/num_lat_total": 2038.0, "debug/num_tok_loss": 1656.0, "debug/num_tok_total": 2038.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.17802619934082, "train/diffusion_loss": 0.470550000667572 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1715.0, "debug/num_lat_total": 2040.0, "debug/num_tok_loss": 1715.0, "debug/num_tok_total": 2040.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.206125259399414, "train/diffusion_loss": 0.4850577116012573 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1800.0, "debug/num_lat_total": 2091.0, "debug/num_tok_loss": 1800.0, "debug/num_tok_total": 2091.0, "epoch": 0.29906542056074764, "step": 2 }, { "epoch": 0.29906542056074764, "step": 2, "train/ce_loss": 12.160561561584473, "train/diffusion_loss": 0.490349680185318 }, { "epoch": 0.29906542056074764, "step": 2, "train/learning_rate_real": 2.5e-05 }, { "debug/num_lat_loss": 1708.0, "debug/num_lat_total": 2022.0, "debug/num_tok_loss": 1708.0, "debug/num_tok_total": 2022.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.188081741333008, "train/diffusion_loss": 0.47481265664100647 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1770.0, "debug/num_lat_total": 2267.0, "debug/num_tok_loss": 1770.0, "debug/num_tok_total": 2267.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.177225112915039, "train/diffusion_loss": 0.44798392057418823 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1796.0, "debug/num_lat_total": 2222.0, "debug/num_tok_loss": 1796.0, "debug/num_tok_total": 2222.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.23061752319336, "train/diffusion_loss": 0.46705523133277893 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1770.0, "debug/num_lat_total": 2040.0, "debug/num_tok_loss": 1770.0, "debug/num_tok_total": 2040.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.185416221618652, "train/diffusion_loss": 0.4840916395187378 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1766.0, "debug/num_lat_total": 2299.0, "debug/num_tok_loss": 1766.0, "debug/num_tok_total": 2299.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.19878101348877, "train/diffusion_loss": 0.4340754747390747 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1676.0, "debug/num_lat_total": 2198.0, "debug/num_tok_loss": 1676.0, "debug/num_tok_total": 2198.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.189212799072266, "train/diffusion_loss": 0.4262164533138275 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1746.0, "debug/num_lat_total": 2169.0, "debug/num_tok_loss": 1746.0, "debug/num_tok_total": 2169.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.141818046569824, "train/diffusion_loss": 0.45898202061653137 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1704.0, "debug/num_lat_total": 2325.0, "debug/num_tok_loss": 1704.0, "debug/num_tok_total": 2325.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.170629501342773, "train/diffusion_loss": 0.4151637852191925 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1750.0, "debug/num_lat_total": 2158.0, "debug/num_tok_loss": 1750.0, "debug/num_tok_total": 2158.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.186365127563477, "train/diffusion_loss": 0.4442407190799713 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2162.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2162.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.155019760131836, "train/diffusion_loss": 0.49728766083717346 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1750.0, "debug/num_lat_total": 2237.0, "debug/num_tok_loss": 1750.0, "debug/num_tok_total": 2237.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.148735046386719, "train/diffusion_loss": 0.4589671492576599 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1637.0, "debug/num_lat_total": 2085.0, "debug/num_tok_loss": 1637.0, "debug/num_tok_total": 2085.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.126914024353027, "train/diffusion_loss": 0.4655221104621887 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1767.0, "debug/num_lat_total": 2346.0, "debug/num_tok_loss": 1767.0, "debug/num_tok_total": 2346.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.135104179382324, "train/diffusion_loss": 0.4546721279621124 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1778.0, "debug/num_lat_total": 2127.0, "debug/num_tok_loss": 1778.0, "debug/num_tok_total": 2127.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.196648597717285, "train/diffusion_loss": 0.5008854269981384 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1815.0, "debug/num_lat_total": 2265.0, "debug/num_tok_loss": 1815.0, "debug/num_tok_total": 2265.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.170491218566895, "train/diffusion_loss": 0.4510475695133209 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1678.0, "debug/num_lat_total": 2084.0, "debug/num_tok_loss": 1678.0, "debug/num_tok_total": 2084.0, "epoch": 0.4485981308411215, "step": 3 }, { "epoch": 0.4485981308411215, "step": 3, "train/ce_loss": 12.16767406463623, "train/diffusion_loss": 0.46289849281311035 }, { "epoch": 0.4485981308411215, "step": 3, "train/learning_rate_real": 2.494339903216356e-05 }, { "debug/num_lat_loss": 1614.0, "debug/num_lat_total": 2067.0, "debug/num_tok_loss": 1614.0, "debug/num_tok_total": 2067.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.186376571655273, "train/diffusion_loss": 0.44896766543388367 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1755.0, "debug/num_lat_total": 2264.0, "debug/num_tok_loss": 1755.0, "debug/num_tok_total": 2264.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.194488525390625, "train/diffusion_loss": 0.45699965953826904 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 2208.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 2208.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.146739959716797, "train/diffusion_loss": 0.4560658037662506 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1722.0, "debug/num_lat_total": 2150.0, "debug/num_tok_loss": 1722.0, "debug/num_tok_total": 2150.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.168990135192871, "train/diffusion_loss": 0.4327857792377472 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1701.0, "debug/num_lat_total": 2325.0, "debug/num_tok_loss": 1701.0, "debug/num_tok_total": 2325.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.165539741516113, "train/diffusion_loss": 0.4052616059780121 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1773.0, "debug/num_lat_total": 2101.0, "debug/num_tok_loss": 1773.0, "debug/num_tok_total": 2101.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.168839454650879, "train/diffusion_loss": 0.49070754647254944 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1778.0, "debug/num_lat_total": 2235.0, "debug/num_tok_loss": 1778.0, "debug/num_tok_total": 2235.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.132225036621094, "train/diffusion_loss": 0.45063555240631104 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1789.0, "debug/num_lat_total": 2330.0, "debug/num_tok_loss": 1789.0, "debug/num_tok_total": 2330.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.154467582702637, "train/diffusion_loss": 0.44124725461006165 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1690.0, "debug/num_lat_total": 2242.0, "debug/num_tok_loss": 1690.0, "debug/num_tok_total": 2242.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.173988342285156, "train/diffusion_loss": 0.4344332814216614 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1665.0, "debug/num_lat_total": 2143.0, "debug/num_tok_loss": 1665.0, "debug/num_tok_total": 2143.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.202644348144531, "train/diffusion_loss": 0.4590062201023102 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1742.0, "debug/num_lat_total": 2099.0, "debug/num_tok_loss": 1742.0, "debug/num_tok_total": 2099.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.214736938476562, "train/diffusion_loss": 0.4900093674659729 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1803.0, "debug/num_lat_total": 2117.0, "debug/num_tok_loss": 1803.0, "debug/num_tok_total": 2117.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.18010139465332, "train/diffusion_loss": 0.45859843492507935 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1733.0, "debug/num_lat_total": 2136.0, "debug/num_tok_loss": 1733.0, "debug/num_tok_total": 2136.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.151824951171875, "train/diffusion_loss": 0.44397464394569397 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1679.0, "debug/num_lat_total": 2231.0, "debug/num_tok_loss": 1679.0, "debug/num_tok_total": 2231.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.15367317199707, "train/diffusion_loss": 0.4424894154071808 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1803.0, "debug/num_lat_total": 2330.0, "debug/num_tok_loss": 1803.0, "debug/num_tok_total": 2330.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.184985160827637, "train/diffusion_loss": 0.4271906018257141 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1714.0, "debug/num_lat_total": 2169.0, "debug/num_tok_loss": 1714.0, "debug/num_tok_total": 2169.0, "epoch": 0.5981308411214953, "step": 4 }, { "epoch": 0.5981308411214953, "step": 4, "train/ce_loss": 12.191471099853516, "train/diffusion_loss": 0.46100130677223206 }, { "epoch": 0.5981308411214953, "step": 4, "train/learning_rate_real": 2.4774108715783832e-05 }, { "debug/num_lat_loss": 1738.0, "debug/num_lat_total": 1939.0, "debug/num_tok_loss": 1738.0, "debug/num_tok_total": 1939.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.195037841796875, "train/diffusion_loss": 0.49820560216903687 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1688.0, "debug/num_lat_total": 2105.0, "debug/num_tok_loss": 1688.0, "debug/num_tok_total": 2105.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.162631034851074, "train/diffusion_loss": 0.4296850860118866 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1723.0, "debug/num_lat_total": 2177.0, "debug/num_tok_loss": 1723.0, "debug/num_tok_total": 2177.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.171772956848145, "train/diffusion_loss": 0.4458027184009552 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1783.0, "debug/num_lat_total": 2125.0, "debug/num_tok_loss": 1783.0, "debug/num_tok_total": 2125.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.18253231048584, "train/diffusion_loss": 0.4729151427745819 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1759.0, "debug/num_lat_total": 2354.0, "debug/num_tok_loss": 1759.0, "debug/num_tok_total": 2354.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.203909873962402, "train/diffusion_loss": 0.4511413276195526 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1745.0, "debug/num_lat_total": 2019.0, "debug/num_tok_loss": 1745.0, "debug/num_tok_total": 2019.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.175359725952148, "train/diffusion_loss": 0.4547876715660095 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1783.0, "debug/num_lat_total": 2269.0, "debug/num_tok_loss": 1783.0, "debug/num_tok_total": 2269.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.203401565551758, "train/diffusion_loss": 0.44810980558395386 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1765.0, "debug/num_lat_total": 2174.0, "debug/num_tok_loss": 1765.0, "debug/num_tok_total": 2174.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.165059089660645, "train/diffusion_loss": 0.46098846197128296 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1735.0, "debug/num_lat_total": 2087.0, "debug/num_tok_loss": 1735.0, "debug/num_tok_total": 2087.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.127701759338379, "train/diffusion_loss": 0.46769240498542786 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1750.0, "debug/num_lat_total": 2071.0, "debug/num_tok_loss": 1750.0, "debug/num_tok_total": 2071.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.178753852844238, "train/diffusion_loss": 0.4612983465194702 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1777.0, "debug/num_lat_total": 2052.0, "debug/num_tok_loss": 1777.0, "debug/num_tok_total": 2052.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.192585945129395, "train/diffusion_loss": 0.46960434317588806 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1735.0, "debug/num_lat_total": 2097.0, "debug/num_tok_loss": 1735.0, "debug/num_tok_total": 2097.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.17921257019043, "train/diffusion_loss": 0.4624210000038147 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1681.0, "debug/num_lat_total": 2131.0, "debug/num_tok_loss": 1681.0, "debug/num_tok_total": 2131.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.16563606262207, "train/diffusion_loss": 0.45106709003448486 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1811.0, "debug/num_lat_total": 2189.0, "debug/num_tok_loss": 1811.0, "debug/num_tok_total": 2189.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.170101165771484, "train/diffusion_loss": 0.4966118633747101 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1682.0, "debug/num_lat_total": 2156.0, "debug/num_tok_loss": 1682.0, "debug/num_tok_total": 2156.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.169591903686523, "train/diffusion_loss": 0.445074200630188 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1752.0, "debug/num_lat_total": 2327.0, "debug/num_tok_loss": 1752.0, "debug/num_tok_total": 2327.0, "epoch": 0.7476635514018691, "step": 5 }, { "epoch": 0.7476635514018691, "step": 5, "train/ce_loss": 12.20793342590332, "train/diffusion_loss": 0.42772501707077026 }, { "epoch": 0.7476635514018691, "step": 5, "train/learning_rate_real": 2.449366217018122e-05 }, { "debug/num_lat_loss": 1709.0, "debug/num_lat_total": 2042.0, "debug/num_tok_loss": 1709.0, "debug/num_tok_total": 2042.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.166390419006348, "train/diffusion_loss": 0.4637673497200012 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1852.0, "debug/num_lat_total": 2350.0, "debug/num_tok_loss": 1852.0, "debug/num_tok_total": 2350.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.213371276855469, "train/diffusion_loss": 0.4463023245334625 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1697.0, "debug/num_lat_total": 2142.0, "debug/num_tok_loss": 1697.0, "debug/num_tok_total": 2142.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.193582534790039, "train/diffusion_loss": 0.4670042097568512 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1767.0, "debug/num_lat_total": 2228.0, "debug/num_tok_loss": 1767.0, "debug/num_tok_total": 2228.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.215458869934082, "train/diffusion_loss": 0.4344768226146698 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1739.0, "debug/num_lat_total": 2109.0, "debug/num_tok_loss": 1739.0, "debug/num_tok_total": 2109.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.17676067352295, "train/diffusion_loss": 0.4460485279560089 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1736.0, "debug/num_lat_total": 2020.0, "debug/num_tok_loss": 1736.0, "debug/num_tok_total": 2020.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.166215896606445, "train/diffusion_loss": 0.44714999198913574 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1769.0, "debug/num_lat_total": 2321.0, "debug/num_tok_loss": 1769.0, "debug/num_tok_total": 2321.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.184131622314453, "train/diffusion_loss": 0.4322231411933899 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1661.0, "debug/num_lat_total": 2082.0, "debug/num_tok_loss": 1661.0, "debug/num_tok_total": 2082.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.15831470489502, "train/diffusion_loss": 0.44721469283103943 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1731.0, "debug/num_lat_total": 2121.0, "debug/num_tok_loss": 1731.0, "debug/num_tok_total": 2121.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.174907684326172, "train/diffusion_loss": 0.43733832240104675 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1783.0, "debug/num_lat_total": 2077.0, "debug/num_tok_loss": 1783.0, "debug/num_tok_total": 2077.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.199905395507812, "train/diffusion_loss": 0.45236700773239136 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 229.0, "debug/num_lat_total": 229.0, "debug/num_tok_loss": 229.0, "debug/num_tok_total": 229.0, "epoch": 0.897196261682243, "step": 6 }, { "epoch": 0.897196261682243, "step": 6, "train/ce_loss": 12.282599449157715, "train/diffusion_loss": 0.5522232055664062 }, { "epoch": 0.897196261682243, "step": 6, "train/learning_rate_real": 2.4104599162700908e-05 }, { "debug/num_lat_loss": 1699.0, "debug/num_lat_total": 2101.0, "debug/num_tok_loss": 1699.0, "debug/num_tok_total": 2101.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.172179222106934, "train/diffusion_loss": 0.4296949505805969 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1688.0, "debug/num_lat_total": 2086.0, "debug/num_tok_loss": 1688.0, "debug/num_tok_total": 2086.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.194772720336914, "train/diffusion_loss": 0.4478878676891327 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1729.0, "debug/num_lat_total": 2374.0, "debug/num_tok_loss": 1729.0, "debug/num_tok_total": 2374.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.205586433410645, "train/diffusion_loss": 0.4039336144924164 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1692.0, "debug/num_lat_total": 2233.0, "debug/num_tok_loss": 1692.0, "debug/num_tok_total": 2233.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.20322322845459, "train/diffusion_loss": 0.4448912441730499 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2252.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2252.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.161945343017578, "train/diffusion_loss": 0.4232192933559418 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1782.0, "debug/num_lat_total": 2246.0, "debug/num_tok_loss": 1782.0, "debug/num_tok_total": 2246.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.166516304016113, "train/diffusion_loss": 0.44215527176856995 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1684.0, "debug/num_lat_total": 2101.0, "debug/num_tok_loss": 1684.0, "debug/num_tok_total": 2101.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.172343254089355, "train/diffusion_loss": 0.46213802695274353 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1741.0, "debug/num_lat_total": 2286.0, "debug/num_tok_loss": 1741.0, "debug/num_tok_total": 2286.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.176877975463867, "train/diffusion_loss": 0.43911412358283997 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1646.0, "debug/num_lat_total": 1761.0, "debug/num_tok_loss": 1646.0, "debug/num_tok_total": 1761.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.188158988952637, "train/diffusion_loss": 0.4860377311706543 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1823.0, "debug/num_lat_total": 2333.0, "debug/num_tok_loss": 1823.0, "debug/num_tok_total": 2333.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.187177658081055, "train/diffusion_loss": 0.43619203567504883 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1817.0, "debug/num_lat_total": 2273.0, "debug/num_tok_loss": 1817.0, "debug/num_tok_total": 2273.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.17230224609375, "train/diffusion_loss": 0.4542900025844574 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1701.0, "debug/num_lat_total": 2256.0, "debug/num_tok_loss": 1701.0, "debug/num_tok_total": 2256.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.149282455444336, "train/diffusion_loss": 0.43968647718429565 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1708.0, "debug/num_lat_total": 2278.0, "debug/num_tok_loss": 1708.0, "debug/num_tok_total": 2278.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.181864738464355, "train/diffusion_loss": 0.433378666639328 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1808.0, "debug/num_lat_total": 2109.0, "debug/num_tok_loss": 1808.0, "debug/num_tok_total": 2109.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.208248138427734, "train/diffusion_loss": 0.45877185463905334 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1655.0, "debug/num_lat_total": 2171.0, "debug/num_tok_loss": 1655.0, "debug/num_tok_total": 2171.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.172933578491211, "train/diffusion_loss": 0.4297572076320648 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1760.0, "debug/num_lat_total": 2230.0, "debug/num_tok_loss": 1760.0, "debug/num_tok_total": 2230.0, "epoch": 1.0, "step": 7 }, { "epoch": 1.0, "step": 7, "train/ce_loss": 12.13405990600586, "train/diffusion_loss": 0.4668017029762268 }, { "epoch": 1.0, "step": 7, "train/learning_rate_real": 2.3610443108186546e-05 }, { "debug/num_lat_loss": 1678.0, "debug/num_lat_total": 2256.0, "debug/num_tok_loss": 1678.0, "debug/num_tok_total": 2256.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.185417175292969, "train/diffusion_loss": 0.4371003806591034 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2139.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2139.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.221545219421387, "train/diffusion_loss": 0.4711552560329437 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1767.0, "debug/num_lat_total": 2145.0, "debug/num_tok_loss": 1767.0, "debug/num_tok_total": 2145.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.16722583770752, "train/diffusion_loss": 0.46221664547920227 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1871.0, "debug/num_lat_total": 2412.0, "debug/num_tok_loss": 1871.0, "debug/num_tok_total": 2412.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.183208465576172, "train/diffusion_loss": 0.42312583327293396 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1770.0, "debug/num_lat_total": 2357.0, "debug/num_tok_loss": 1770.0, "debug/num_tok_total": 2357.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.181570053100586, "train/diffusion_loss": 0.4198269248008728 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1691.0, "debug/num_lat_total": 2096.0, "debug/num_tok_loss": 1691.0, "debug/num_tok_total": 2096.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.1736421585083, "train/diffusion_loss": 0.4673723876476288 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1703.0, "debug/num_lat_total": 2182.0, "debug/num_tok_loss": 1703.0, "debug/num_tok_total": 2182.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.155488014221191, "train/diffusion_loss": 0.4332273304462433 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1732.0, "debug/num_lat_total": 2212.0, "debug/num_tok_loss": 1732.0, "debug/num_tok_total": 2212.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.16242504119873, "train/diffusion_loss": 0.44073769450187683 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1707.0, "debug/num_lat_total": 2084.0, "debug/num_tok_loss": 1707.0, "debug/num_tok_total": 2084.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.175156593322754, "train/diffusion_loss": 0.4498469829559326 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1635.0, "debug/num_lat_total": 1945.0, "debug/num_tok_loss": 1635.0, "debug/num_tok_total": 1945.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.206686019897461, "train/diffusion_loss": 0.46417516469955444 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1735.0, "debug/num_lat_total": 2205.0, "debug/num_tok_loss": 1735.0, "debug/num_tok_total": 2205.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.191603660583496, "train/diffusion_loss": 0.4583619236946106 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1767.0, "debug/num_lat_total": 2142.0, "debug/num_tok_loss": 1767.0, "debug/num_tok_total": 2142.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.173551559448242, "train/diffusion_loss": 0.47600993514060974 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1746.0, "debug/num_lat_total": 2214.0, "debug/num_tok_loss": 1746.0, "debug/num_tok_total": 2214.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.174034118652344, "train/diffusion_loss": 0.4496922194957733 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 2097.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 2097.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.169427871704102, "train/diffusion_loss": 0.46032634377479553 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1720.0, "debug/num_lat_total": 2218.0, "debug/num_tok_loss": 1720.0, "debug/num_tok_total": 2218.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.195882797241211, "train/diffusion_loss": 0.4113229513168335 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1775.0, "debug/num_lat_total": 2160.0, "debug/num_tok_loss": 1775.0, "debug/num_tok_total": 2160.0, "epoch": 1.1495327102803738, "step": 8 }, { "epoch": 1.1495327102803738, "step": 8, "train/ce_loss": 12.156371116638184, "train/diffusion_loss": 0.4266815781593323 }, { "epoch": 1.1495327102803738, "step": 8, "train/learning_rate_real": 2.3015669160389767e-05 }, { "debug/num_lat_loss": 1731.0, "debug/num_lat_total": 2141.0, "debug/num_tok_loss": 1731.0, "debug/num_tok_total": 2141.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.152505874633789, "train/diffusion_loss": 0.46556028723716736 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1801.0, "debug/num_lat_total": 2340.0, "debug/num_tok_loss": 1801.0, "debug/num_tok_total": 2340.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.16299819946289, "train/diffusion_loss": 0.42512306571006775 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1731.0, "debug/num_lat_total": 2226.0, "debug/num_tok_loss": 1731.0, "debug/num_tok_total": 2226.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.169912338256836, "train/diffusion_loss": 0.4332532584667206 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1699.0, "debug/num_lat_total": 2058.0, "debug/num_tok_loss": 1699.0, "debug/num_tok_total": 2058.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.193463325500488, "train/diffusion_loss": 0.466909795999527 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1696.0, "debug/num_lat_total": 2215.0, "debug/num_tok_loss": 1696.0, "debug/num_tok_total": 2215.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.178363800048828, "train/diffusion_loss": 0.4238511621952057 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1704.0, "debug/num_lat_total": 2075.0, "debug/num_tok_loss": 1704.0, "debug/num_tok_total": 2075.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.177104949951172, "train/diffusion_loss": 0.4522017538547516 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 2164.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 2164.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.171281814575195, "train/diffusion_loss": 0.45279401540756226 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1785.0, "debug/num_lat_total": 2090.0, "debug/num_tok_loss": 1785.0, "debug/num_tok_total": 2090.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.184865951538086, "train/diffusion_loss": 0.47387492656707764 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1716.0, "debug/num_lat_total": 2273.0, "debug/num_tok_loss": 1716.0, "debug/num_tok_total": 2273.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.163240432739258, "train/diffusion_loss": 0.4024707078933716 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1820.0, "debug/num_lat_total": 2160.0, "debug/num_tok_loss": 1820.0, "debug/num_tok_total": 2160.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.185773849487305, "train/diffusion_loss": 0.4599778354167938 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1792.0, "debug/num_lat_total": 2318.0, "debug/num_tok_loss": 1792.0, "debug/num_tok_total": 2318.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.170687675476074, "train/diffusion_loss": 0.4315679967403412 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1769.0, "debug/num_lat_total": 2129.0, "debug/num_tok_loss": 1769.0, "debug/num_tok_total": 2129.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.157472610473633, "train/diffusion_loss": 0.4691459834575653 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 1996.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 1996.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.206344604492188, "train/diffusion_loss": 0.47081127762794495 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1730.0, "debug/num_lat_total": 2146.0, "debug/num_tok_loss": 1730.0, "debug/num_tok_total": 2146.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.170424461364746, "train/diffusion_loss": 0.4682280719280243 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1785.0, "debug/num_lat_total": 2218.0, "debug/num_tok_loss": 1785.0, "debug/num_tok_total": 2218.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.194655418395996, "train/diffusion_loss": 0.4655971825122833 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "debug/num_lat_loss": 1816.0, "debug/num_lat_total": 2312.0, "debug/num_tok_loss": 1816.0, "debug/num_tok_total": 2312.0, "epoch": 1.2990654205607477, "step": 9 }, { "epoch": 1.2990654205607477, "step": 9, "train/ce_loss": 12.17812728881836, "train/diffusion_loss": 0.4462972581386566 }, { "epoch": 1.2990654205607477, "step": 9, "train/learning_rate_real": 2.2325663684284844e-05 }, { "epoch": 1.4485981308411215, "grad_norm": 0.10791015625, "learning_rate": 2.2325663684284844e-05, "loss": 1.1205, "step": 10 }, { "debug/num_lat_loss": 1693.0, "debug/num_lat_total": 2185.0, "debug/num_tok_loss": 1693.0, "debug/num_tok_total": 2185.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.169471740722656, "train/diffusion_loss": 0.4354258179664612 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1766.0, "debug/num_lat_total": 2349.0, "debug/num_tok_loss": 1766.0, "debug/num_tok_total": 2349.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.206490516662598, "train/diffusion_loss": 0.39133599400520325 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1719.0, "debug/num_lat_total": 2088.0, "debug/num_tok_loss": 1719.0, "debug/num_tok_total": 2088.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.198739051818848, "train/diffusion_loss": 0.4691934883594513 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1711.0, "debug/num_lat_total": 2242.0, "debug/num_tok_loss": 1711.0, "debug/num_tok_total": 2242.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.150611877441406, "train/diffusion_loss": 0.4412599205970764 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1795.0, "debug/num_lat_total": 2284.0, "debug/num_tok_loss": 1795.0, "debug/num_tok_total": 2284.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.177535057067871, "train/diffusion_loss": 0.41242048144340515 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1715.0, "debug/num_lat_total": 2231.0, "debug/num_tok_loss": 1715.0, "debug/num_tok_total": 2231.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.17111873626709, "train/diffusion_loss": 0.43765971064567566 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1726.0, "debug/num_lat_total": 2153.0, "debug/num_tok_loss": 1726.0, "debug/num_tok_total": 2153.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.200366973876953, "train/diffusion_loss": 0.4417407512664795 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1805.0, "debug/num_lat_total": 2263.0, "debug/num_tok_loss": 1805.0, "debug/num_tok_total": 2263.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.149072647094727, "train/diffusion_loss": 0.4602048397064209 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1811.0, "debug/num_lat_total": 2264.0, "debug/num_tok_loss": 1811.0, "debug/num_tok_total": 2264.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.151084899902344, "train/diffusion_loss": 0.4598139822483063 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1795.0, "debug/num_lat_total": 2296.0, "debug/num_tok_loss": 1795.0, "debug/num_tok_total": 2296.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.177088737487793, "train/diffusion_loss": 0.4397522211074829 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1757.0, "debug/num_lat_total": 2316.0, "debug/num_tok_loss": 1757.0, "debug/num_tok_total": 2316.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.180797576904297, "train/diffusion_loss": 0.46918240189552307 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1710.0, "debug/num_lat_total": 2171.0, "debug/num_tok_loss": 1710.0, "debug/num_tok_total": 2171.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.182649612426758, "train/diffusion_loss": 0.44089797139167786 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1830.0, "debug/num_lat_total": 2244.0, "debug/num_tok_loss": 1830.0, "debug/num_tok_total": 2244.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.196844100952148, "train/diffusion_loss": 0.43193933367729187 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1700.0, "debug/num_lat_total": 2234.0, "debug/num_tok_loss": 1700.0, "debug/num_tok_total": 2234.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.192126274108887, "train/diffusion_loss": 0.44308194518089294 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1675.0, "debug/num_lat_total": 2107.0, "debug/num_tok_loss": 1675.0, "debug/num_tok_total": 2107.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.138769149780273, "train/diffusion_loss": 0.43758758902549744 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1729.0, "debug/num_lat_total": 2221.0, "debug/num_tok_loss": 1729.0, "debug/num_tok_total": 2221.0, "epoch": 1.4485981308411215, "step": 10 }, { "epoch": 1.4485981308411215, "step": 10, "train/ce_loss": 12.164264678955078, "train/diffusion_loss": 0.4479159712791443 }, { "epoch": 1.4485981308411215, "step": 10, "train/learning_rate_real": 2.154667547631338e-05 }, { "debug/num_lat_loss": 1773.0, "debug/num_lat_total": 2226.0, "debug/num_tok_loss": 1773.0, "debug/num_tok_total": 2226.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.166282653808594, "train/diffusion_loss": 0.45015960931777954 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1780.0, "debug/num_lat_total": 2155.0, "debug/num_tok_loss": 1780.0, "debug/num_tok_total": 2155.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.18748950958252, "train/diffusion_loss": 0.46807411313056946 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1712.0, "debug/num_lat_total": 2075.0, "debug/num_tok_loss": 1712.0, "debug/num_tok_total": 2075.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.184382438659668, "train/diffusion_loss": 0.45887696743011475 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1835.0, "debug/num_lat_total": 2278.0, "debug/num_tok_loss": 1835.0, "debug/num_tok_total": 2278.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.13071060180664, "train/diffusion_loss": 0.4673099219799042 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2127.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2127.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.159421920776367, "train/diffusion_loss": 0.44304782152175903 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1729.0, "debug/num_lat_total": 2149.0, "debug/num_tok_loss": 1729.0, "debug/num_tok_total": 2149.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.162511825561523, "train/diffusion_loss": 0.4454917013645172 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1803.0, "debug/num_lat_total": 2170.0, "debug/num_tok_loss": 1803.0, "debug/num_tok_total": 2170.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.184347152709961, "train/diffusion_loss": 0.4569142460823059 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1758.0, "debug/num_lat_total": 2278.0, "debug/num_tok_loss": 1758.0, "debug/num_tok_total": 2278.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.191046714782715, "train/diffusion_loss": 0.4136866629123688 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1779.0, "debug/num_lat_total": 2387.0, "debug/num_tok_loss": 1779.0, "debug/num_tok_total": 2387.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.171055793762207, "train/diffusion_loss": 0.4274556338787079 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1735.0, "debug/num_lat_total": 2052.0, "debug/num_tok_loss": 1735.0, "debug/num_tok_total": 2052.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.187844276428223, "train/diffusion_loss": 0.4536263048648834 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1751.0, "debug/num_lat_total": 2068.0, "debug/num_tok_loss": 1751.0, "debug/num_tok_total": 2068.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.170848846435547, "train/diffusion_loss": 0.4488384425640106 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2149.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2149.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.222294807434082, "train/diffusion_loss": 0.4231165945529938 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1672.0, "debug/num_lat_total": 2265.0, "debug/num_tok_loss": 1672.0, "debug/num_tok_total": 2265.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.197094917297363, "train/diffusion_loss": 0.42200806736946106 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1809.0, "debug/num_lat_total": 2274.0, "debug/num_tok_loss": 1809.0, "debug/num_tok_total": 2274.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.177701950073242, "train/diffusion_loss": 0.4552193582057953 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1671.0, "debug/num_lat_total": 2181.0, "debug/num_tok_loss": 1671.0, "debug/num_tok_total": 2181.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.13666820526123, "train/diffusion_loss": 0.419909805059433 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1770.0, "debug/num_lat_total": 2254.0, "debug/num_tok_loss": 1770.0, "debug/num_tok_total": 2254.0, "epoch": 1.5981308411214954, "step": 11 }, { "epoch": 1.5981308411214954, "step": 11, "train/ce_loss": 12.172820091247559, "train/diffusion_loss": 0.43515339493751526 }, { "epoch": 1.5981308411214954, "step": 11, "train/learning_rate_real": 2.0685759174316066e-05 }, { "debug/num_lat_loss": 1760.0, "debug/num_lat_total": 2127.0, "debug/num_tok_loss": 1760.0, "debug/num_tok_total": 2127.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.22167682647705, "train/diffusion_loss": 0.4806464910507202 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1802.0, "debug/num_lat_total": 2147.0, "debug/num_tok_loss": 1802.0, "debug/num_tok_total": 2147.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.198952674865723, "train/diffusion_loss": 0.4417625069618225 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1730.0, "debug/num_lat_total": 2123.0, "debug/num_tok_loss": 1730.0, "debug/num_tok_total": 2123.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.184418678283691, "train/diffusion_loss": 0.45918792486190796 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1656.0, "debug/num_lat_total": 1983.0, "debug/num_tok_loss": 1656.0, "debug/num_tok_total": 1983.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.202133178710938, "train/diffusion_loss": 0.4891001284122467 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1718.0, "debug/num_lat_total": 2212.0, "debug/num_tok_loss": 1718.0, "debug/num_tok_total": 2212.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.170212745666504, "train/diffusion_loss": 0.4450775682926178 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1697.0, "debug/num_lat_total": 2134.0, "debug/num_tok_loss": 1697.0, "debug/num_tok_total": 2134.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.188855171203613, "train/diffusion_loss": 0.4286879301071167 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1689.0, "debug/num_lat_total": 2269.0, "debug/num_tok_loss": 1689.0, "debug/num_tok_total": 2269.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.180878639221191, "train/diffusion_loss": 0.3961124122142792 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1748.0, "debug/num_lat_total": 2199.0, "debug/num_tok_loss": 1748.0, "debug/num_tok_total": 2199.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.146210670471191, "train/diffusion_loss": 0.458207905292511 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1738.0, "debug/num_lat_total": 2336.0, "debug/num_tok_loss": 1738.0, "debug/num_tok_total": 2336.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.17103099822998, "train/diffusion_loss": 0.41296547651290894 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1691.0, "debug/num_lat_total": 2167.0, "debug/num_tok_loss": 1691.0, "debug/num_tok_total": 2167.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.197969436645508, "train/diffusion_loss": 0.44860365986824036 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1751.0, "debug/num_lat_total": 2084.0, "debug/num_tok_loss": 1751.0, "debug/num_tok_total": 2084.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.17561149597168, "train/diffusion_loss": 0.4686024487018585 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1750.0, "debug/num_lat_total": 2188.0, "debug/num_tok_loss": 1750.0, "debug/num_tok_total": 2188.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.174299240112305, "train/diffusion_loss": 0.45362716913223267 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2347.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2347.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.163989067077637, "train/diffusion_loss": 0.40822833776474 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1764.0, "debug/num_lat_total": 2213.0, "debug/num_tok_loss": 1764.0, "debug/num_tok_total": 2213.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.16737174987793, "train/diffusion_loss": 0.437155544757843 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2335.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2335.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.159972190856934, "train/diffusion_loss": 0.4289741516113281 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1694.0, "debug/num_lat_total": 2192.0, "debug/num_tok_loss": 1694.0, "debug/num_tok_total": 2192.0, "epoch": 1.7476635514018692, "step": 12 }, { "epoch": 1.7476635514018692, "step": 12, "train/ce_loss": 12.163084030151367, "train/diffusion_loss": 0.4414047300815582 }, { "epoch": 1.7476635514018692, "step": 12, "train/learning_rate_real": 1.9750711369639978e-05 }, { "debug/num_lat_loss": 1726.0, "debug/num_lat_total": 2200.0, "debug/num_tok_loss": 1726.0, "debug/num_tok_total": 2200.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.161957740783691, "train/diffusion_loss": 0.4361889362335205 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1617.0, "debug/num_lat_total": 2198.0, "debug/num_tok_loss": 1617.0, "debug/num_tok_total": 2198.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.193390846252441, "train/diffusion_loss": 0.4090780019760132 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1813.0, "debug/num_lat_total": 2269.0, "debug/num_tok_loss": 1813.0, "debug/num_tok_total": 2269.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.186473846435547, "train/diffusion_loss": 0.45172038674354553 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1796.0, "debug/num_lat_total": 2229.0, "debug/num_tok_loss": 1796.0, "debug/num_tok_total": 2229.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.181390762329102, "train/diffusion_loss": 0.43670597672462463 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1712.0, "debug/num_lat_total": 2176.0, "debug/num_tok_loss": 1712.0, "debug/num_tok_total": 2176.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.185544967651367, "train/diffusion_loss": 0.4369116425514221 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1784.0, "debug/num_lat_total": 2337.0, "debug/num_tok_loss": 1784.0, "debug/num_tok_total": 2337.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.142572402954102, "train/diffusion_loss": 0.4199765920639038 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1742.0, "debug/num_lat_total": 2196.0, "debug/num_tok_loss": 1742.0, "debug/num_tok_total": 2196.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.183996200561523, "train/diffusion_loss": 0.4249250292778015 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1725.0, "debug/num_lat_total": 2215.0, "debug/num_tok_loss": 1725.0, "debug/num_tok_total": 2215.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.17967700958252, "train/diffusion_loss": 0.4066171944141388 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1690.0, "debug/num_lat_total": 2162.0, "debug/num_tok_loss": 1690.0, "debug/num_tok_total": 2162.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.194439888000488, "train/diffusion_loss": 0.45021936297416687 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1711.0, "debug/num_lat_total": 2092.0, "debug/num_tok_loss": 1711.0, "debug/num_tok_total": 2092.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.181828498840332, "train/diffusion_loss": 0.4418796896934509 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 243.0, "debug/num_lat_total": 341.0, "debug/num_tok_loss": 243.0, "debug/num_tok_total": 341.0, "epoch": 1.897196261682243, "step": 13 }, { "epoch": 1.897196261682243, "step": 13, "train/ce_loss": 12.189529418945312, "train/diffusion_loss": 0.4245636463165283 }, { "epoch": 1.897196261682243, "step": 13, "train/learning_rate_real": 1.8750000000000002e-05 }, { "debug/num_lat_loss": 1713.0, "debug/num_lat_total": 2182.0, "debug/num_tok_loss": 1713.0, "debug/num_tok_total": 2182.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.153608322143555, "train/diffusion_loss": 0.4352942705154419 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1792.0, "debug/num_lat_total": 2291.0, "debug/num_tok_loss": 1792.0, "debug/num_tok_total": 2291.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.172529220581055, "train/diffusion_loss": 0.41673797369003296 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1818.0, "debug/num_lat_total": 2261.0, "debug/num_tok_loss": 1818.0, "debug/num_tok_total": 2261.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.160377502441406, "train/diffusion_loss": 0.44119855761528015 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1688.0, "debug/num_lat_total": 2162.0, "debug/num_tok_loss": 1688.0, "debug/num_tok_total": 2162.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.186813354492188, "train/diffusion_loss": 0.41701310873031616 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1789.0, "debug/num_lat_total": 2174.0, "debug/num_tok_loss": 1789.0, "debug/num_tok_total": 2174.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.174489974975586, "train/diffusion_loss": 0.45595914125442505 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1731.0, "debug/num_lat_total": 2278.0, "debug/num_tok_loss": 1731.0, "debug/num_tok_total": 2278.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.182331085205078, "train/diffusion_loss": 0.4117718040943146 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1755.0, "debug/num_lat_total": 2268.0, "debug/num_tok_loss": 1755.0, "debug/num_tok_total": 2268.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.161623001098633, "train/diffusion_loss": 0.4075234830379486 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1648.0, "debug/num_lat_total": 2062.0, "debug/num_tok_loss": 1648.0, "debug/num_tok_total": 2062.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.20829963684082, "train/diffusion_loss": 0.4243350625038147 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1684.0, "debug/num_lat_total": 2065.0, "debug/num_tok_loss": 1684.0, "debug/num_tok_total": 2065.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.194366455078125, "train/diffusion_loss": 0.4790455102920532 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1738.0, "debug/num_lat_total": 2258.0, "debug/num_tok_loss": 1738.0, "debug/num_tok_total": 2258.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.151714324951172, "train/diffusion_loss": 0.4375750422477722 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1714.0, "debug/num_lat_total": 2035.0, "debug/num_tok_loss": 1714.0, "debug/num_tok_total": 2035.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.17888355255127, "train/diffusion_loss": 0.48229870200157166 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1791.0, "debug/num_lat_total": 2202.0, "debug/num_tok_loss": 1791.0, "debug/num_tok_total": 2202.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.186660766601562, "train/diffusion_loss": 0.44068536162376404 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1718.0, "debug/num_lat_total": 2154.0, "debug/num_tok_loss": 1718.0, "debug/num_tok_total": 2154.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.159738540649414, "train/diffusion_loss": 0.4153040647506714 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1741.0, "debug/num_lat_total": 2189.0, "debug/num_tok_loss": 1741.0, "debug/num_tok_total": 2189.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.197320938110352, "train/diffusion_loss": 0.438213586807251 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1695.0, "debug/num_lat_total": 2245.0, "debug/num_tok_loss": 1695.0, "debug/num_tok_total": 2245.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.16823673248291, "train/diffusion_loss": 0.41748422384262085 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1803.0, "debug/num_lat_total": 2096.0, "debug/num_tok_loss": 1803.0, "debug/num_tok_total": 2096.0, "epoch": 2.0, "step": 14 }, { "epoch": 2.0, "step": 14, "train/ce_loss": 12.14641284942627, "train/diffusion_loss": 0.4777478277683258 }, { "epoch": 2.0, "step": 14, "train/learning_rate_real": 1.7692687662523583e-05 }, { "debug/num_lat_loss": 1804.0, "debug/num_lat_total": 2239.0, "debug/num_tok_loss": 1804.0, "debug/num_tok_total": 2239.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.182928085327148, "train/diffusion_loss": 0.4475792646408081 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1797.0, "debug/num_lat_total": 2358.0, "debug/num_tok_loss": 1797.0, "debug/num_tok_total": 2358.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.155641555786133, "train/diffusion_loss": 0.41666021943092346 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1727.0, "debug/num_lat_total": 2232.0, "debug/num_tok_loss": 1727.0, "debug/num_tok_total": 2232.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.173539161682129, "train/diffusion_loss": 0.45217418670654297 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1698.0, "debug/num_lat_total": 2181.0, "debug/num_tok_loss": 1698.0, "debug/num_tok_total": 2181.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.21281623840332, "train/diffusion_loss": 0.44602900743484497 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1759.0, "debug/num_lat_total": 2135.0, "debug/num_tok_loss": 1759.0, "debug/num_tok_total": 2135.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.174015998840332, "train/diffusion_loss": 0.4635976552963257 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1727.0, "debug/num_lat_total": 2196.0, "debug/num_tok_loss": 1727.0, "debug/num_tok_total": 2196.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.1640625, "train/diffusion_loss": 0.4420984983444214 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1758.0, "debug/num_lat_total": 2018.0, "debug/num_tok_loss": 1758.0, "debug/num_tok_total": 2018.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.15977668762207, "train/diffusion_loss": 0.4814409911632538 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1678.0, "debug/num_lat_total": 2058.0, "debug/num_tok_loss": 1678.0, "debug/num_tok_total": 2058.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.213717460632324, "train/diffusion_loss": 0.4840865135192871 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1840.0, "debug/num_lat_total": 2459.0, "debug/num_tok_loss": 1840.0, "debug/num_tok_total": 2459.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.171710968017578, "train/diffusion_loss": 0.43757733702659607 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1760.0, "debug/num_lat_total": 2225.0, "debug/num_tok_loss": 1760.0, "debug/num_tok_total": 2225.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.18978214263916, "train/diffusion_loss": 0.44430798292160034 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1721.0, "debug/num_lat_total": 2157.0, "debug/num_tok_loss": 1721.0, "debug/num_tok_total": 2157.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.193751335144043, "train/diffusion_loss": 0.4408707916736603 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1709.0, "debug/num_lat_total": 2245.0, "debug/num_tok_loss": 1709.0, "debug/num_tok_total": 2245.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.183303833007812, "train/diffusion_loss": 0.4226411283016205 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1795.0, "debug/num_lat_total": 2263.0, "debug/num_tok_loss": 1795.0, "debug/num_tok_total": 2263.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.184673309326172, "train/diffusion_loss": 0.4556589722633362 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1673.0, "debug/num_lat_total": 2082.0, "debug/num_tok_loss": 1673.0, "debug/num_tok_total": 2082.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.158052444458008, "train/diffusion_loss": 0.4358730912208557 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1708.0, "debug/num_lat_total": 2268.0, "debug/num_tok_loss": 1708.0, "debug/num_tok_total": 2268.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.191332817077637, "train/diffusion_loss": 0.4384799599647522 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1706.0, "debug/num_lat_total": 2108.0, "debug/num_tok_loss": 1706.0, "debug/num_tok_total": 2108.0, "epoch": 2.149532710280374, "step": 15 }, { "epoch": 2.149532710280374, "step": 15, "train/ce_loss": 12.224422454833984, "train/diffusion_loss": 0.44930553436279297 }, { "epoch": 2.149532710280374, "step": 15, "train/learning_rate_real": 1.6588349541467772e-05 }, { "debug/num_lat_loss": 1746.0, "debug/num_lat_total": 2080.0, "debug/num_tok_loss": 1746.0, "debug/num_tok_total": 2080.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.148018836975098, "train/diffusion_loss": 0.48873013257980347 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1709.0, "debug/num_lat_total": 2070.0, "debug/num_tok_loss": 1709.0, "debug/num_tok_total": 2070.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.192659378051758, "train/diffusion_loss": 0.453331857919693 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1769.0, "debug/num_lat_total": 2275.0, "debug/num_tok_loss": 1769.0, "debug/num_tok_total": 2275.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.193303108215332, "train/diffusion_loss": 0.43189382553100586 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1780.0, "debug/num_lat_total": 2346.0, "debug/num_tok_loss": 1780.0, "debug/num_tok_total": 2346.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.181623458862305, "train/diffusion_loss": 0.4424782991409302 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 2180.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 2180.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.204513549804688, "train/diffusion_loss": 0.4519851803779602 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1695.0, "debug/num_lat_total": 2054.0, "debug/num_tok_loss": 1695.0, "debug/num_tok_total": 2054.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.146162033081055, "train/diffusion_loss": 0.4398154020309448 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1761.0, "debug/num_lat_total": 2290.0, "debug/num_tok_loss": 1761.0, "debug/num_tok_total": 2290.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.200496673583984, "train/diffusion_loss": 0.4298996031284332 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1779.0, "debug/num_lat_total": 2133.0, "debug/num_tok_loss": 1779.0, "debug/num_tok_total": 2133.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.160360336303711, "train/diffusion_loss": 0.4252331554889679 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1761.0, "debug/num_lat_total": 2142.0, "debug/num_tok_loss": 1761.0, "debug/num_tok_total": 2142.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.186250686645508, "train/diffusion_loss": 0.4651545584201813 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1748.0, "debug/num_lat_total": 2243.0, "debug/num_tok_loss": 1748.0, "debug/num_tok_total": 2243.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.174160957336426, "train/diffusion_loss": 0.4075007140636444 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1712.0, "debug/num_lat_total": 2172.0, "debug/num_tok_loss": 1712.0, "debug/num_tok_total": 2172.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.175134658813477, "train/diffusion_loss": 0.42257148027420044 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1671.0, "debug/num_lat_total": 2062.0, "debug/num_tok_loss": 1671.0, "debug/num_tok_total": 2062.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.141824722290039, "train/diffusion_loss": 0.470732718706131 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1715.0, "debug/num_lat_total": 2233.0, "debug/num_tok_loss": 1715.0, "debug/num_tok_total": 2233.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.187322616577148, "train/diffusion_loss": 0.4473858177661896 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1748.0, "debug/num_lat_total": 2375.0, "debug/num_tok_loss": 1748.0, "debug/num_tok_total": 2375.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.16384506225586, "train/diffusion_loss": 0.4119928479194641 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1770.0, "debug/num_lat_total": 2268.0, "debug/num_tok_loss": 1770.0, "debug/num_tok_total": 2268.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.176751136779785, "train/diffusion_loss": 0.4215977191925049 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1763.0, "debug/num_lat_total": 2374.0, "debug/num_tok_loss": 1763.0, "debug/num_tok_total": 2374.0, "epoch": 2.2990654205607477, "step": 16 }, { "epoch": 2.2990654205607477, "step": 16, "train/ce_loss": 12.189051628112793, "train/diffusion_loss": 0.40282106399536133 }, { "epoch": 2.2990654205607477, "step": 16, "train/learning_rate_real": 1.5446986693867843e-05 }, { "debug/num_lat_loss": 1764.0, "debug/num_lat_total": 2167.0, "debug/num_tok_loss": 1764.0, "debug/num_tok_total": 2167.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.176045417785645, "train/diffusion_loss": 0.45012861490249634 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1688.0, "debug/num_lat_total": 1927.0, "debug/num_tok_loss": 1688.0, "debug/num_tok_total": 1927.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.193452835083008, "train/diffusion_loss": 0.4965061545372009 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1691.0, "debug/num_lat_total": 2287.0, "debug/num_tok_loss": 1691.0, "debug/num_tok_total": 2287.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.18310260772705, "train/diffusion_loss": 0.42198505997657776 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1744.0, "debug/num_lat_total": 2118.0, "debug/num_tok_loss": 1744.0, "debug/num_tok_total": 2118.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.154280662536621, "train/diffusion_loss": 0.4763759672641754 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1775.0, "debug/num_lat_total": 2223.0, "debug/num_tok_loss": 1775.0, "debug/num_tok_total": 2223.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.169533729553223, "train/diffusion_loss": 0.4356708228588104 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1777.0, "debug/num_lat_total": 2186.0, "debug/num_tok_loss": 1777.0, "debug/num_tok_total": 2186.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.186701774597168, "train/diffusion_loss": 0.44108399748802185 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2174.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2174.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.183830261230469, "train/diffusion_loss": 0.4281693398952484 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1764.0, "debug/num_lat_total": 2347.0, "debug/num_tok_loss": 1764.0, "debug/num_tok_total": 2347.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.176342964172363, "train/diffusion_loss": 0.43779289722442627 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1774.0, "debug/num_lat_total": 2054.0, "debug/num_tok_loss": 1774.0, "debug/num_tok_total": 2054.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.179503440856934, "train/diffusion_loss": 0.4560769498348236 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1712.0, "debug/num_lat_total": 2106.0, "debug/num_tok_loss": 1712.0, "debug/num_tok_total": 2106.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.209382057189941, "train/diffusion_loss": 0.442518413066864 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1709.0, "debug/num_lat_total": 1990.0, "debug/num_tok_loss": 1709.0, "debug/num_tok_total": 1990.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.172338485717773, "train/diffusion_loss": 0.4518572986125946 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1696.0, "debug/num_lat_total": 2036.0, "debug/num_tok_loss": 1696.0, "debug/num_tok_total": 2036.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.185693740844727, "train/diffusion_loss": 0.43408462405204773 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1798.0, "debug/num_lat_total": 2274.0, "debug/num_tok_loss": 1798.0, "debug/num_tok_total": 2274.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.175325393676758, "train/diffusion_loss": 0.4459254741668701 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1704.0, "debug/num_lat_total": 2057.0, "debug/num_tok_loss": 1704.0, "debug/num_tok_total": 2057.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.171283721923828, "train/diffusion_loss": 0.444389671087265 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 2073.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 2073.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.175067901611328, "train/diffusion_loss": 0.5100155472755432 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2309.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2309.0, "epoch": 2.4485981308411215, "step": 17 }, { "epoch": 2.4485981308411215, "step": 17, "train/ce_loss": 12.165181159973145, "train/diffusion_loss": 0.4261295795440674 }, { "epoch": 2.4485981308411215, "step": 17, "train/learning_rate_real": 1.4278935478416066e-05 }, { "debug/num_lat_loss": 1790.0, "debug/num_lat_total": 2235.0, "debug/num_tok_loss": 1790.0, "debug/num_tok_total": 2235.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.180500030517578, "train/diffusion_loss": 0.4695003032684326 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1726.0, "debug/num_lat_total": 2267.0, "debug/num_tok_loss": 1726.0, "debug/num_tok_total": 2267.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.168024063110352, "train/diffusion_loss": 0.44884082674980164 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1786.0, "debug/num_lat_total": 2243.0, "debug/num_tok_loss": 1786.0, "debug/num_tok_total": 2243.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.177437782287598, "train/diffusion_loss": 0.43809157609939575 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2247.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2247.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.172117233276367, "train/diffusion_loss": 0.45602065324783325 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1767.0, "debug/num_lat_total": 2294.0, "debug/num_tok_loss": 1767.0, "debug/num_tok_total": 2294.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.180341720581055, "train/diffusion_loss": 0.41540735960006714 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1786.0, "debug/num_lat_total": 2094.0, "debug/num_tok_loss": 1786.0, "debug/num_tok_total": 2094.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.180994033813477, "train/diffusion_loss": 0.44258132576942444 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1732.0, "debug/num_lat_total": 2205.0, "debug/num_tok_loss": 1732.0, "debug/num_tok_total": 2205.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.193119049072266, "train/diffusion_loss": 0.4508376717567444 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1663.0, "debug/num_lat_total": 2067.0, "debug/num_tok_loss": 1663.0, "debug/num_tok_total": 2067.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.170428276062012, "train/diffusion_loss": 0.4963611662387848 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1732.0, "debug/num_lat_total": 2262.0, "debug/num_tok_loss": 1732.0, "debug/num_tok_total": 2262.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.17246150970459, "train/diffusion_loss": 0.4106256365776062 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1689.0, "debug/num_lat_total": 2285.0, "debug/num_tok_loss": 1689.0, "debug/num_tok_total": 2285.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.178051948547363, "train/diffusion_loss": 0.4075928330421448 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1792.0, "debug/num_lat_total": 2185.0, "debug/num_tok_loss": 1792.0, "debug/num_tok_total": 2185.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.185931205749512, "train/diffusion_loss": 0.43419307470321655 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1733.0, "debug/num_lat_total": 2252.0, "debug/num_tok_loss": 1733.0, "debug/num_tok_total": 2252.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.168244361877441, "train/diffusion_loss": 0.4210459291934967 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1751.0, "debug/num_lat_total": 2186.0, "debug/num_tok_loss": 1751.0, "debug/num_tok_total": 2186.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.163827896118164, "train/diffusion_loss": 0.43450531363487244 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1805.0, "debug/num_lat_total": 2312.0, "debug/num_tok_loss": 1805.0, "debug/num_tok_total": 2312.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.179530143737793, "train/diffusion_loss": 0.44190558791160583 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1823.0, "debug/num_lat_total": 2209.0, "debug/num_tok_loss": 1823.0, "debug/num_tok_total": 2209.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.182696342468262, "train/diffusion_loss": 0.49013906717300415 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1720.0, "debug/num_lat_total": 2257.0, "debug/num_tok_loss": 1720.0, "debug/num_tok_total": 2257.0, "epoch": 2.5981308411214954, "step": 18 }, { "epoch": 2.5981308411214954, "step": 18, "train/ce_loss": 12.155743598937988, "train/diffusion_loss": 0.4087804853916168 }, { "epoch": 2.5981308411214954, "step": 18, "train/learning_rate_real": 1.3094773947796781e-05 }, { "debug/num_lat_loss": 1648.0, "debug/num_lat_total": 2245.0, "debug/num_tok_loss": 1648.0, "debug/num_tok_total": 2245.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.173835754394531, "train/diffusion_loss": 0.40596216917037964 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1770.0, "debug/num_lat_total": 2270.0, "debug/num_tok_loss": 1770.0, "debug/num_tok_total": 2270.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.166971206665039, "train/diffusion_loss": 0.41651174426078796 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1732.0, "debug/num_lat_total": 2314.0, "debug/num_tok_loss": 1732.0, "debug/num_tok_total": 2314.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.187857627868652, "train/diffusion_loss": 0.41560783982276917 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1764.0, "debug/num_lat_total": 2279.0, "debug/num_tok_loss": 1764.0, "debug/num_tok_total": 2279.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.170059204101562, "train/diffusion_loss": 0.4451139569282532 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1793.0, "debug/num_lat_total": 2246.0, "debug/num_tok_loss": 1793.0, "debug/num_tok_total": 2246.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.177019119262695, "train/diffusion_loss": 0.45224013924598694 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1628.0, "debug/num_lat_total": 2159.0, "debug/num_tok_loss": 1628.0, "debug/num_tok_total": 2159.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.135967254638672, "train/diffusion_loss": 0.4413244128227234 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1774.0, "debug/num_lat_total": 2277.0, "debug/num_tok_loss": 1774.0, "debug/num_tok_total": 2277.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.178704261779785, "train/diffusion_loss": 0.45805883407592773 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1695.0, "debug/num_lat_total": 2213.0, "debug/num_tok_loss": 1695.0, "debug/num_tok_total": 2213.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.163392066955566, "train/diffusion_loss": 0.4596419930458069 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1688.0, "debug/num_lat_total": 2197.0, "debug/num_tok_loss": 1688.0, "debug/num_tok_total": 2197.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.184797286987305, "train/diffusion_loss": 0.42093706130981445 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1703.0, "debug/num_lat_total": 2192.0, "debug/num_tok_loss": 1703.0, "debug/num_tok_total": 2192.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.184639930725098, "train/diffusion_loss": 0.40613827109336853 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1821.0, "debug/num_lat_total": 2138.0, "debug/num_tok_loss": 1821.0, "debug/num_tok_total": 2138.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.184473991394043, "train/diffusion_loss": 0.45566800236701965 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1733.0, "debug/num_lat_total": 2259.0, "debug/num_tok_loss": 1733.0, "debug/num_tok_total": 2259.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.151446342468262, "train/diffusion_loss": 0.44823265075683594 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1692.0, "debug/num_lat_total": 2014.0, "debug/num_tok_loss": 1692.0, "debug/num_tok_total": 2014.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.180910110473633, "train/diffusion_loss": 0.4702076315879822 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1776.0, "debug/num_lat_total": 2414.0, "debug/num_tok_loss": 1776.0, "debug/num_tok_total": 2414.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.170491218566895, "train/diffusion_loss": 0.3913854658603668 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1720.0, "debug/num_lat_total": 2192.0, "debug/num_tok_loss": 1720.0, "debug/num_tok_total": 2192.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.153451919555664, "train/diffusion_loss": 0.4243754744529724 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "debug/num_lat_loss": 1739.0, "debug/num_lat_total": 2216.0, "debug/num_tok_loss": 1739.0, "debug/num_tok_total": 2216.0, "epoch": 2.7476635514018692, "step": 19 }, { "epoch": 2.7476635514018692, "step": 19, "train/ce_loss": 12.181028366088867, "train/diffusion_loss": 0.4464891254901886 }, { "epoch": 2.7476635514018692, "step": 19, "train/learning_rate_real": 1.1905226052203222e-05 }, { "epoch": 2.897196261682243, "grad_norm": 0.080078125, "learning_rate": 1.1905226052203222e-05, "loss": 1.1042, "step": 20 }, { "debug/num_lat_loss": 1720.0, "debug/num_lat_total": 1958.0, "debug/num_tok_loss": 1720.0, "debug/num_tok_total": 1958.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.18447494506836, "train/diffusion_loss": 0.4715176522731781 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1738.0, "debug/num_lat_total": 2334.0, "debug/num_tok_loss": 1738.0, "debug/num_tok_total": 2334.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.158917427062988, "train/diffusion_loss": 0.4330300986766815 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1704.0, "debug/num_lat_total": 2079.0, "debug/num_tok_loss": 1704.0, "debug/num_tok_total": 2079.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.178497314453125, "train/diffusion_loss": 0.4486521780490875 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1756.0, "debug/num_lat_total": 2360.0, "debug/num_tok_loss": 1756.0, "debug/num_tok_total": 2360.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.15247917175293, "train/diffusion_loss": 0.401742160320282 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1790.0, "debug/num_lat_total": 2236.0, "debug/num_tok_loss": 1790.0, "debug/num_tok_total": 2236.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.17627239227295, "train/diffusion_loss": 0.4607166051864624 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1718.0, "debug/num_lat_total": 2358.0, "debug/num_tok_loss": 1718.0, "debug/num_tok_total": 2358.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.17782974243164, "train/diffusion_loss": 0.41307130455970764 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1711.0, "debug/num_lat_total": 2199.0, "debug/num_tok_loss": 1711.0, "debug/num_tok_total": 2199.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.14721393585205, "train/diffusion_loss": 0.4494905471801758 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1699.0, "debug/num_lat_total": 2313.0, "debug/num_tok_loss": 1699.0, "debug/num_tok_total": 2313.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.170693397521973, "train/diffusion_loss": 0.41251811385154724 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1776.0, "debug/num_lat_total": 2357.0, "debug/num_tok_loss": 1776.0, "debug/num_tok_total": 2357.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.161653518676758, "train/diffusion_loss": 0.42305028438568115 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1811.0, "debug/num_lat_total": 2179.0, "debug/num_tok_loss": 1811.0, "debug/num_tok_total": 2179.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.158014297485352, "train/diffusion_loss": 0.4795665144920349 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 198.0, "debug/num_lat_total": 285.0, "debug/num_tok_loss": 198.0, "debug/num_tok_total": 285.0, "epoch": 2.897196261682243, "step": 20 }, { "epoch": 2.897196261682243, "step": 20, "train/ce_loss": 12.160079956054688, "train/diffusion_loss": 0.43429556488990784 }, { "epoch": 2.897196261682243, "step": 20, "train/learning_rate_real": 1.0721064521583937e-05 }, { "debug/num_lat_loss": 1731.0, "debug/num_lat_total": 2029.0, "debug/num_tok_loss": 1731.0, "debug/num_tok_total": 2029.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.185141563415527, "train/diffusion_loss": 0.47871869802474976 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1705.0, "debug/num_lat_total": 2217.0, "debug/num_tok_loss": 1705.0, "debug/num_tok_total": 2217.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.161572456359863, "train/diffusion_loss": 0.42034080624580383 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1708.0, "debug/num_lat_total": 2155.0, "debug/num_tok_loss": 1708.0, "debug/num_tok_total": 2155.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.176295280456543, "train/diffusion_loss": 0.43198931217193604 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1724.0, "debug/num_lat_total": 2164.0, "debug/num_tok_loss": 1724.0, "debug/num_tok_total": 2164.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.164573669433594, "train/diffusion_loss": 0.49672797322273254 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1785.0, "debug/num_lat_total": 2239.0, "debug/num_tok_loss": 1785.0, "debug/num_tok_total": 2239.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.175557136535645, "train/diffusion_loss": 0.43289849162101746 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1715.0, "debug/num_lat_total": 2117.0, "debug/num_tok_loss": 1715.0, "debug/num_tok_total": 2117.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.159709930419922, "train/diffusion_loss": 0.4435068964958191 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1713.0, "debug/num_lat_total": 1979.0, "debug/num_tok_loss": 1713.0, "debug/num_tok_total": 1979.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.173222541809082, "train/diffusion_loss": 0.4639737010002136 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1692.0, "debug/num_lat_total": 2159.0, "debug/num_tok_loss": 1692.0, "debug/num_tok_total": 2159.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.170083045959473, "train/diffusion_loss": 0.4349198639392853 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1748.0, "debug/num_lat_total": 2184.0, "debug/num_tok_loss": 1748.0, "debug/num_tok_total": 2184.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.192635536193848, "train/diffusion_loss": 0.4608837068080902 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1773.0, "debug/num_lat_total": 2210.0, "debug/num_tok_loss": 1773.0, "debug/num_tok_total": 2210.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.185585975646973, "train/diffusion_loss": 0.44979315996170044 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1739.0, "debug/num_lat_total": 2259.0, "debug/num_tok_loss": 1739.0, "debug/num_tok_total": 2259.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.171687126159668, "train/diffusion_loss": 0.4464608430862427 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1703.0, "debug/num_lat_total": 2137.0, "debug/num_tok_loss": 1703.0, "debug/num_tok_total": 2137.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.194785118103027, "train/diffusion_loss": 0.4453159272670746 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1741.0, "debug/num_lat_total": 2147.0, "debug/num_tok_loss": 1741.0, "debug/num_tok_total": 2147.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.135725021362305, "train/diffusion_loss": 0.4403951168060303 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1685.0, "debug/num_lat_total": 2181.0, "debug/num_tok_loss": 1685.0, "debug/num_tok_total": 2181.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.180353164672852, "train/diffusion_loss": 0.4038219153881073 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1712.0, "debug/num_lat_total": 2147.0, "debug/num_tok_loss": 1712.0, "debug/num_tok_total": 2147.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.182561874389648, "train/diffusion_loss": 0.44825613498687744 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1772.0, "debug/num_lat_total": 2275.0, "debug/num_tok_loss": 1772.0, "debug/num_tok_total": 2275.0, "epoch": 3.0, "step": 21 }, { "epoch": 3.0, "step": 21, "train/ce_loss": 12.186759948730469, "train/diffusion_loss": 0.42617136240005493 }, { "epoch": 3.0, "step": 21, "train/learning_rate_real": 9.553013306132158e-06 }, { "debug/num_lat_loss": 1688.0, "debug/num_lat_total": 1989.0, "debug/num_tok_loss": 1688.0, "debug/num_tok_total": 1989.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.166876792907715, "train/diffusion_loss": 0.4436281621456146 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1663.0, "debug/num_lat_total": 2146.0, "debug/num_tok_loss": 1663.0, "debug/num_tok_total": 2146.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.181292533874512, "train/diffusion_loss": 0.43562567234039307 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1733.0, "debug/num_lat_total": 2199.0, "debug/num_tok_loss": 1733.0, "debug/num_tok_total": 2199.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.206137657165527, "train/diffusion_loss": 0.4738239347934723 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1697.0, "debug/num_lat_total": 2169.0, "debug/num_tok_loss": 1697.0, "debug/num_tok_total": 2169.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.14698314666748, "train/diffusion_loss": 0.41713082790374756 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1868.0, "debug/num_lat_total": 2287.0, "debug/num_tok_loss": 1868.0, "debug/num_tok_total": 2287.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.143259048461914, "train/diffusion_loss": 0.4566551744937897 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1811.0, "debug/num_lat_total": 2433.0, "debug/num_tok_loss": 1811.0, "debug/num_tok_total": 2433.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.159565925598145, "train/diffusion_loss": 0.41677698493003845 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1716.0, "debug/num_lat_total": 2281.0, "debug/num_tok_loss": 1716.0, "debug/num_tok_total": 2281.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.1969575881958, "train/diffusion_loss": 0.45901116728782654 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1741.0, "debug/num_lat_total": 2128.0, "debug/num_tok_loss": 1741.0, "debug/num_tok_total": 2128.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.142516136169434, "train/diffusion_loss": 0.4245382249355316 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1732.0, "debug/num_lat_total": 2290.0, "debug/num_tok_loss": 1732.0, "debug/num_tok_total": 2290.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.163461685180664, "train/diffusion_loss": 0.41974490880966187 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1673.0, "debug/num_lat_total": 2143.0, "debug/num_tok_loss": 1673.0, "debug/num_tok_total": 2143.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.204856872558594, "train/diffusion_loss": 0.4260376989841461 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1832.0, "debug/num_lat_total": 2448.0, "debug/num_tok_loss": 1832.0, "debug/num_tok_total": 2448.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.169397354125977, "train/diffusion_loss": 0.444273442029953 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1746.0, "debug/num_lat_total": 2292.0, "debug/num_tok_loss": 1746.0, "debug/num_tok_total": 2292.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.207819938659668, "train/diffusion_loss": 0.42534464597702026 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1724.0, "debug/num_lat_total": 2197.0, "debug/num_tok_loss": 1724.0, "debug/num_tok_total": 2197.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.198328971862793, "train/diffusion_loss": 0.42757922410964966 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1890.0, "debug/num_lat_total": 2202.0, "debug/num_tok_loss": 1890.0, "debug/num_tok_total": 2202.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.175276756286621, "train/diffusion_loss": 0.44557538628578186 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1775.0, "debug/num_lat_total": 2346.0, "debug/num_tok_loss": 1775.0, "debug/num_tok_total": 2346.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.125227928161621, "train/diffusion_loss": 0.4252835214138031 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1765.0, "debug/num_lat_total": 2221.0, "debug/num_tok_loss": 1765.0, "debug/num_tok_total": 2221.0, "epoch": 3.149532710280374, "step": 22 }, { "epoch": 3.149532710280374, "step": 22, "train/ce_loss": 12.168408393859863, "train/diffusion_loss": 0.4143877625465393 }, { "epoch": 3.149532710280374, "step": 22, "train/learning_rate_real": 8.41165045853223e-06 }, { "debug/num_lat_loss": 1774.0, "debug/num_lat_total": 2312.0, "debug/num_tok_loss": 1774.0, "debug/num_tok_total": 2312.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.194767951965332, "train/diffusion_loss": 0.40749505162239075 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1748.0, "debug/num_lat_total": 2337.0, "debug/num_tok_loss": 1748.0, "debug/num_tok_total": 2337.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.180062294006348, "train/diffusion_loss": 0.45502886176109314 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1764.0, "debug/num_lat_total": 2187.0, "debug/num_tok_loss": 1764.0, "debug/num_tok_total": 2187.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.19076919555664, "train/diffusion_loss": 0.4535031318664551 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2437.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2437.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.163100242614746, "train/diffusion_loss": 0.4055667221546173 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1738.0, "debug/num_lat_total": 2215.0, "debug/num_tok_loss": 1738.0, "debug/num_tok_total": 2215.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.180434226989746, "train/diffusion_loss": 0.42948290705680847 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1705.0, "debug/num_lat_total": 2159.0, "debug/num_tok_loss": 1705.0, "debug/num_tok_total": 2159.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.177447319030762, "train/diffusion_loss": 0.4368229806423187 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1677.0, "debug/num_lat_total": 1909.0, "debug/num_tok_loss": 1677.0, "debug/num_tok_total": 1909.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.212713241577148, "train/diffusion_loss": 0.4799862802028656 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1807.0, "debug/num_lat_total": 2175.0, "debug/num_tok_loss": 1807.0, "debug/num_tok_total": 2175.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.173916816711426, "train/diffusion_loss": 0.47795355319976807 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 2179.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 2179.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.16403865814209, "train/diffusion_loss": 0.43208783864974976 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1737.0, "debug/num_lat_total": 2147.0, "debug/num_tok_loss": 1737.0, "debug/num_tok_total": 2147.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.195341110229492, "train/diffusion_loss": 0.42608267068862915 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1694.0, "debug/num_lat_total": 2095.0, "debug/num_tok_loss": 1694.0, "debug/num_tok_total": 2095.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.224993705749512, "train/diffusion_loss": 0.4705503582954407 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1715.0, "debug/num_lat_total": 2151.0, "debug/num_tok_loss": 1715.0, "debug/num_tok_total": 2151.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.196223258972168, "train/diffusion_loss": 0.44091811776161194 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1663.0, "debug/num_lat_total": 2059.0, "debug/num_tok_loss": 1663.0, "debug/num_tok_total": 2059.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.171528816223145, "train/diffusion_loss": 0.47177937626838684 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1700.0, "debug/num_lat_total": 2117.0, "debug/num_tok_loss": 1700.0, "debug/num_tok_total": 2117.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.132240295410156, "train/diffusion_loss": 0.4833270311355591 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1800.0, "debug/num_lat_total": 2179.0, "debug/num_tok_loss": 1800.0, "debug/num_tok_total": 2179.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.171632766723633, "train/diffusion_loss": 0.4560210704803467 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1743.0, "debug/num_lat_total": 2176.0, "debug/num_tok_loss": 1743.0, "debug/num_tok_total": 2176.0, "epoch": 3.2990654205607477, "step": 23 }, { "epoch": 3.2990654205607477, "step": 23, "train/ce_loss": 12.158903121948242, "train/diffusion_loss": 0.43445152044296265 }, { "epoch": 3.2990654205607477, "step": 23, "train/learning_rate_real": 7.307312337476421e-06 }, { "debug/num_lat_loss": 1815.0, "debug/num_lat_total": 2252.0, "debug/num_tok_loss": 1815.0, "debug/num_tok_total": 2252.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.207298278808594, "train/diffusion_loss": 0.4564967453479767 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1737.0, "debug/num_lat_total": 2213.0, "debug/num_tok_loss": 1737.0, "debug/num_tok_total": 2213.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.177847862243652, "train/diffusion_loss": 0.4450188875198364 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1676.0, "debug/num_lat_total": 2186.0, "debug/num_tok_loss": 1676.0, "debug/num_tok_total": 2186.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.1792631149292, "train/diffusion_loss": 0.4131785035133362 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1822.0, "debug/num_lat_total": 2243.0, "debug/num_tok_loss": 1822.0, "debug/num_tok_total": 2243.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.178991317749023, "train/diffusion_loss": 0.46926724910736084 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1668.0, "debug/num_lat_total": 2087.0, "debug/num_tok_loss": 1668.0, "debug/num_tok_total": 2087.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.16328239440918, "train/diffusion_loss": 0.46612855792045593 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1724.0, "debug/num_lat_total": 2304.0, "debug/num_tok_loss": 1724.0, "debug/num_tok_total": 2304.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.163469314575195, "train/diffusion_loss": 0.40386247634887695 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1803.0, "debug/num_lat_total": 2119.0, "debug/num_tok_loss": 1803.0, "debug/num_tok_total": 2119.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.176862716674805, "train/diffusion_loss": 0.4733399748802185 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1701.0, "debug/num_lat_total": 2097.0, "debug/num_tok_loss": 1701.0, "debug/num_tok_total": 2097.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.180752754211426, "train/diffusion_loss": 0.4555293917655945 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1705.0, "debug/num_lat_total": 2041.0, "debug/num_tok_loss": 1705.0, "debug/num_tok_total": 2041.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.19352912902832, "train/diffusion_loss": 0.4499252736568451 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1749.0, "debug/num_lat_total": 2035.0, "debug/num_tok_loss": 1749.0, "debug/num_tok_total": 2035.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.191561698913574, "train/diffusion_loss": 0.438742995262146 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1681.0, "debug/num_lat_total": 2014.0, "debug/num_tok_loss": 1681.0, "debug/num_tok_total": 2014.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.201146125793457, "train/diffusion_loss": 0.4450303614139557 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1753.0, "debug/num_lat_total": 2150.0, "debug/num_tok_loss": 1753.0, "debug/num_tok_total": 2150.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.183883666992188, "train/diffusion_loss": 0.43293264508247375 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1733.0, "debug/num_lat_total": 2146.0, "debug/num_tok_loss": 1733.0, "debug/num_tok_total": 2146.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.142557144165039, "train/diffusion_loss": 0.43829670548439026 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1722.0, "debug/num_lat_total": 2148.0, "debug/num_tok_loss": 1722.0, "debug/num_tok_total": 2148.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.163311004638672, "train/diffusion_loss": 0.45310938358306885 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1698.0, "debug/num_lat_total": 2301.0, "debug/num_tok_loss": 1698.0, "debug/num_tok_total": 2301.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.16663932800293, "train/diffusion_loss": 0.3920009434223175 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1745.0, "debug/num_lat_total": 2103.0, "debug/num_tok_loss": 1745.0, "debug/num_tok_total": 2103.0, "epoch": 3.4485981308411215, "step": 24 }, { "epoch": 3.4485981308411215, "step": 24, "train/ce_loss": 12.189055442810059, "train/diffusion_loss": 0.45429617166519165 }, { "epoch": 3.4485981308411215, "step": 24, "train/learning_rate_real": 6.250000000000003e-06 }, { "debug/num_lat_loss": 1793.0, "debug/num_lat_total": 2362.0, "debug/num_tok_loss": 1793.0, "debug/num_tok_total": 2362.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.192378044128418, "train/diffusion_loss": 0.4308478832244873 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1769.0, "debug/num_lat_total": 2132.0, "debug/num_tok_loss": 1769.0, "debug/num_tok_total": 2132.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.206249237060547, "train/diffusion_loss": 0.476319283246994 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1816.0, "debug/num_lat_total": 2395.0, "debug/num_tok_loss": 1816.0, "debug/num_tok_total": 2395.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.17150592803955, "train/diffusion_loss": 0.41233769059181213 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1775.0, "debug/num_lat_total": 2214.0, "debug/num_tok_loss": 1775.0, "debug/num_tok_total": 2214.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.180362701416016, "train/diffusion_loss": 0.45229336619377136 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1725.0, "debug/num_lat_total": 2213.0, "debug/num_tok_loss": 1725.0, "debug/num_tok_total": 2213.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.19162654876709, "train/diffusion_loss": 0.41457587480545044 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1696.0, "debug/num_lat_total": 2192.0, "debug/num_tok_loss": 1696.0, "debug/num_tok_total": 2192.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.148582458496094, "train/diffusion_loss": 0.4307917654514313 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1758.0, "debug/num_lat_total": 2080.0, "debug/num_tok_loss": 1758.0, "debug/num_tok_total": 2080.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.170329093933105, "train/diffusion_loss": 0.4580996632575989 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1674.0, "debug/num_lat_total": 2183.0, "debug/num_tok_loss": 1674.0, "debug/num_tok_total": 2183.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.19109058380127, "train/diffusion_loss": 0.42841607332229614 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1698.0, "debug/num_lat_total": 2182.0, "debug/num_tok_loss": 1698.0, "debug/num_tok_total": 2182.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.186971664428711, "train/diffusion_loss": 0.42583712935447693 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1726.0, "debug/num_lat_total": 2186.0, "debug/num_tok_loss": 1726.0, "debug/num_tok_total": 2186.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.18460750579834, "train/diffusion_loss": 0.4379256069660187 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1706.0, "debug/num_lat_total": 2120.0, "debug/num_tok_loss": 1706.0, "debug/num_tok_total": 2120.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.156734466552734, "train/diffusion_loss": 0.42288535833358765 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1783.0, "debug/num_lat_total": 2324.0, "debug/num_tok_loss": 1783.0, "debug/num_tok_total": 2324.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.204289436340332, "train/diffusion_loss": 0.41219431161880493 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1789.0, "debug/num_lat_total": 2248.0, "debug/num_tok_loss": 1789.0, "debug/num_tok_total": 2248.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.165375709533691, "train/diffusion_loss": 0.4418776035308838 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1701.0, "debug/num_lat_total": 2123.0, "debug/num_tok_loss": 1701.0, "debug/num_tok_total": 2123.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.188913345336914, "train/diffusion_loss": 0.4265272319316864 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1703.0, "debug/num_lat_total": 2134.0, "debug/num_tok_loss": 1703.0, "debug/num_tok_total": 2134.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.143837928771973, "train/diffusion_loss": 0.4750172793865204 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1757.0, "debug/num_lat_total": 2405.0, "debug/num_tok_loss": 1757.0, "debug/num_tok_total": 2405.0, "epoch": 3.5981308411214954, "step": 25 }, { "epoch": 3.5981308411214954, "step": 25, "train/ce_loss": 12.149174690246582, "train/diffusion_loss": 0.43947383761405945 }, { "epoch": 3.5981308411214954, "step": 25, "train/learning_rate_real": 5.249288630360025e-06 }, { "debug/num_lat_loss": 1717.0, "debug/num_lat_total": 2117.0, "debug/num_tok_loss": 1717.0, "debug/num_tok_total": 2117.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.182785987854004, "train/diffusion_loss": 0.43974003195762634 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1714.0, "debug/num_lat_total": 2123.0, "debug/num_tok_loss": 1714.0, "debug/num_tok_total": 2123.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.178577423095703, "train/diffusion_loss": 0.4370800256729126 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1674.0, "debug/num_lat_total": 2086.0, "debug/num_tok_loss": 1674.0, "debug/num_tok_total": 2086.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.184208869934082, "train/diffusion_loss": 0.4335319697856903 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1707.0, "debug/num_lat_total": 2047.0, "debug/num_tok_loss": 1707.0, "debug/num_tok_total": 2047.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.191585540771484, "train/diffusion_loss": 0.4503075182437897 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1789.0, "debug/num_lat_total": 2288.0, "debug/num_tok_loss": 1789.0, "debug/num_tok_total": 2288.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.189672470092773, "train/diffusion_loss": 0.4307839572429657 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1781.0, "debug/num_lat_total": 2118.0, "debug/num_tok_loss": 1781.0, "debug/num_tok_total": 2118.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.171289443969727, "train/diffusion_loss": 0.45949694514274597 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1749.0, "debug/num_lat_total": 2243.0, "debug/num_tok_loss": 1749.0, "debug/num_tok_total": 2243.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.190430641174316, "train/diffusion_loss": 0.4002898335456848 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 1911.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 1911.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.152243614196777, "train/diffusion_loss": 0.48624396324157715 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1709.0, "debug/num_lat_total": 2202.0, "debug/num_tok_loss": 1709.0, "debug/num_tok_total": 2202.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.171443939208984, "train/diffusion_loss": 0.43149277567863464 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1801.0, "debug/num_lat_total": 2253.0, "debug/num_tok_loss": 1801.0, "debug/num_tok_total": 2253.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.186466217041016, "train/diffusion_loss": 0.4318007826805115 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1727.0, "debug/num_lat_total": 2297.0, "debug/num_tok_loss": 1727.0, "debug/num_tok_total": 2297.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.161050796508789, "train/diffusion_loss": 0.39592570066452026 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2203.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2203.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.172682762145996, "train/diffusion_loss": 0.4314633011817932 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1729.0, "debug/num_lat_total": 2218.0, "debug/num_tok_loss": 1729.0, "debug/num_tok_total": 2218.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.163350105285645, "train/diffusion_loss": 0.4142179489135742 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1807.0, "debug/num_lat_total": 2361.0, "debug/num_tok_loss": 1807.0, "debug/num_tok_total": 2361.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.161543846130371, "train/diffusion_loss": 0.43518614768981934 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1745.0, "debug/num_lat_total": 2152.0, "debug/num_tok_loss": 1745.0, "debug/num_tok_total": 2152.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.140242576599121, "train/diffusion_loss": 0.43826156854629517 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1799.0, "debug/num_lat_total": 2113.0, "debug/num_tok_loss": 1799.0, "debug/num_tok_total": 2113.0, "epoch": 3.7476635514018692, "step": 26 }, { "epoch": 3.7476635514018692, "step": 26, "train/ce_loss": 12.205531120300293, "train/diffusion_loss": 0.4470859467983246 }, { "epoch": 3.7476635514018692, "step": 26, "train/learning_rate_real": 4.314240825683938e-06 }, { "debug/num_lat_loss": 1771.0, "debug/num_lat_total": 2168.0, "debug/num_tok_loss": 1771.0, "debug/num_tok_total": 2168.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.175880432128906, "train/diffusion_loss": 0.4578666090965271 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2183.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2183.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.155753135681152, "train/diffusion_loss": 0.4481011629104614 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2105.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2105.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.197052955627441, "train/diffusion_loss": 0.43395644426345825 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1786.0, "debug/num_lat_total": 2220.0, "debug/num_tok_loss": 1786.0, "debug/num_tok_total": 2220.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.178627014160156, "train/diffusion_loss": 0.44644054770469666 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1617.0, "debug/num_lat_total": 1821.0, "debug/num_tok_loss": 1617.0, "debug/num_tok_total": 1821.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.175630569458008, "train/diffusion_loss": 0.45170465111732483 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1779.0, "debug/num_lat_total": 2263.0, "debug/num_tok_loss": 1779.0, "debug/num_tok_total": 2263.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.189778327941895, "train/diffusion_loss": 0.4389124810695648 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1782.0, "debug/num_lat_total": 2202.0, "debug/num_tok_loss": 1782.0, "debug/num_tok_total": 2202.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.168231964111328, "train/diffusion_loss": 0.4539552628993988 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1722.0, "debug/num_lat_total": 2049.0, "debug/num_tok_loss": 1722.0, "debug/num_tok_total": 2049.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.237095832824707, "train/diffusion_loss": 0.45853713154792786 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1898.0, "debug/num_lat_total": 2291.0, "debug/num_tok_loss": 1898.0, "debug/num_tok_total": 2291.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.171428680419922, "train/diffusion_loss": 0.44604358077049255 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1667.0, "debug/num_lat_total": 2193.0, "debug/num_tok_loss": 1667.0, "debug/num_tok_total": 2193.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.138124465942383, "train/diffusion_loss": 0.4041489362716675 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 210.0, "debug/num_lat_total": 260.0, "debug/num_tok_loss": 210.0, "debug/num_tok_total": 260.0, "epoch": 3.897196261682243, "step": 27 }, { "epoch": 3.897196261682243, "step": 27, "train/ce_loss": 12.142877578735352, "train/diffusion_loss": 0.5250693559646606 }, { "epoch": 3.897196261682243, "step": 27, "train/learning_rate_real": 3.453324523686623e-06 }, { "debug/num_lat_loss": 1679.0, "debug/num_lat_total": 2145.0, "debug/num_tok_loss": 1679.0, "debug/num_tok_total": 2145.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.175436019897461, "train/diffusion_loss": 0.41446301341056824 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1853.0, "debug/num_lat_total": 2411.0, "debug/num_tok_loss": 1853.0, "debug/num_tok_total": 2411.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.186768531799316, "train/diffusion_loss": 0.41826552152633667 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1724.0, "debug/num_lat_total": 2208.0, "debug/num_tok_loss": 1724.0, "debug/num_tok_total": 2208.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.172440528869629, "train/diffusion_loss": 0.4254646599292755 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1725.0, "debug/num_lat_total": 2244.0, "debug/num_tok_loss": 1725.0, "debug/num_tok_total": 2244.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.202057838439941, "train/diffusion_loss": 0.4286242425441742 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1759.0, "debug/num_lat_total": 2305.0, "debug/num_tok_loss": 1759.0, "debug/num_tok_total": 2305.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.16329288482666, "train/diffusion_loss": 0.4380427598953247 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1662.0, "debug/num_lat_total": 2193.0, "debug/num_tok_loss": 1662.0, "debug/num_tok_total": 2193.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.1865816116333, "train/diffusion_loss": 0.4253368377685547 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1799.0, "debug/num_lat_total": 2314.0, "debug/num_tok_loss": 1799.0, "debug/num_tok_total": 2314.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.162942886352539, "train/diffusion_loss": 0.4381959140300751 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1733.0, "debug/num_lat_total": 2279.0, "debug/num_tok_loss": 1733.0, "debug/num_tok_total": 2279.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.188685417175293, "train/diffusion_loss": 0.4371460974216461 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1711.0, "debug/num_lat_total": 2178.0, "debug/num_tok_loss": 1711.0, "debug/num_tok_total": 2178.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.155284881591797, "train/diffusion_loss": 0.464201956987381 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1752.0, "debug/num_lat_total": 2227.0, "debug/num_tok_loss": 1752.0, "debug/num_tok_total": 2227.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.149553298950195, "train/diffusion_loss": 0.4439617693424225 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1704.0, "debug/num_lat_total": 2086.0, "debug/num_tok_loss": 1704.0, "debug/num_tok_total": 2086.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.204044342041016, "train/diffusion_loss": 0.44241979718208313 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1713.0, "debug/num_lat_total": 2119.0, "debug/num_tok_loss": 1713.0, "debug/num_tok_total": 2119.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.202098846435547, "train/diffusion_loss": 0.44500264525413513 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1720.0, "debug/num_lat_total": 2300.0, "debug/num_tok_loss": 1720.0, "debug/num_tok_total": 2300.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.154370307922363, "train/diffusion_loss": 0.4311031103134155 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1731.0, "debug/num_lat_total": 2194.0, "debug/num_tok_loss": 1731.0, "debug/num_tok_total": 2194.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.157872200012207, "train/diffusion_loss": 0.44460704922676086 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1806.0, "debug/num_lat_total": 2261.0, "debug/num_tok_loss": 1806.0, "debug/num_tok_total": 2261.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.186095237731934, "train/diffusion_loss": 0.4375755488872528 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2022.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2022.0, "epoch": 4.0, "step": 28 }, { "epoch": 4.0, "step": 28, "train/ce_loss": 12.171619415283203, "train/diffusion_loss": 0.4596668481826782 }, { "epoch": 4.0, "step": 28, "train/learning_rate_real": 2.674336315715159e-06 }, { "debug/num_lat_loss": 1674.0, "debug/num_lat_total": 2120.0, "debug/num_tok_loss": 1674.0, "debug/num_tok_total": 2120.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.189408302307129, "train/diffusion_loss": 0.4574336111545563 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1790.0, "debug/num_lat_total": 2237.0, "debug/num_tok_loss": 1790.0, "debug/num_tok_total": 2237.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.166314125061035, "train/diffusion_loss": 0.4323076903820038 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1812.0, "debug/num_lat_total": 2238.0, "debug/num_tok_loss": 1812.0, "debug/num_tok_total": 2238.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.176814079284668, "train/diffusion_loss": 0.42043188214302063 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1780.0, "debug/num_lat_total": 2107.0, "debug/num_tok_loss": 1780.0, "debug/num_tok_total": 2107.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.157387733459473, "train/diffusion_loss": 0.4889250695705414 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1831.0, "debug/num_lat_total": 2428.0, "debug/num_tok_loss": 1831.0, "debug/num_tok_total": 2428.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.166900634765625, "train/diffusion_loss": 0.4233861565589905 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1804.0, "debug/num_lat_total": 2144.0, "debug/num_tok_loss": 1804.0, "debug/num_tok_total": 2144.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.19821834564209, "train/diffusion_loss": 0.4677843153476715 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1752.0, "debug/num_lat_total": 2197.0, "debug/num_tok_loss": 1752.0, "debug/num_tok_total": 2197.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.170734405517578, "train/diffusion_loss": 0.4539377987384796 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1783.0, "debug/num_lat_total": 2446.0, "debug/num_tok_loss": 1783.0, "debug/num_tok_total": 2446.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.198660850524902, "train/diffusion_loss": 0.40065276622772217 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1713.0, "debug/num_lat_total": 2128.0, "debug/num_tok_loss": 1713.0, "debug/num_tok_total": 2128.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.169668197631836, "train/diffusion_loss": 0.4404948949813843 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1655.0, "debug/num_lat_total": 2195.0, "debug/num_tok_loss": 1655.0, "debug/num_tok_total": 2195.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.177712440490723, "train/diffusion_loss": 0.3918401896953583 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1730.0, "debug/num_lat_total": 2218.0, "debug/num_tok_loss": 1730.0, "debug/num_tok_total": 2218.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.167113304138184, "train/diffusion_loss": 0.4331023395061493 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1738.0, "debug/num_lat_total": 2119.0, "debug/num_tok_loss": 1738.0, "debug/num_tok_total": 2119.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.170269966125488, "train/diffusion_loss": 0.43736377358436584 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1736.0, "debug/num_lat_total": 2199.0, "debug/num_tok_loss": 1736.0, "debug/num_tok_total": 2199.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.155172348022461, "train/diffusion_loss": 0.42950916290283203 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1665.0, "debug/num_lat_total": 2159.0, "debug/num_tok_loss": 1665.0, "debug/num_tok_total": 2159.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.158082962036133, "train/diffusion_loss": 0.4130508303642273 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1736.0, "debug/num_lat_total": 2237.0, "debug/num_tok_loss": 1736.0, "debug/num_tok_total": 2237.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.169498443603516, "train/diffusion_loss": 0.453546404838562 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "debug/num_lat_loss": 1689.0, "debug/num_lat_total": 2065.0, "debug/num_tok_loss": 1689.0, "debug/num_tok_total": 2065.0, "epoch": 4.149532710280374, "step": 29 }, { "epoch": 4.149532710280374, "step": 29, "train/ce_loss": 12.140535354614258, "train/diffusion_loss": 0.46471476554870605 }, { "epoch": 4.149532710280374, "step": 29, "train/learning_rate_real": 1.984330839610234e-06 }, { "epoch": 4.299065420560748, "grad_norm": 0.07275390625, "learning_rate": 1.984330839610234e-06, "loss": 1.1041, "step": 30 }, { "debug/num_lat_loss": 1752.0, "debug/num_lat_total": 2173.0, "debug/num_tok_loss": 1752.0, "debug/num_tok_total": 2173.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.145671844482422, "train/diffusion_loss": 0.4525708556175232 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1755.0, "debug/num_lat_total": 1993.0, "debug/num_tok_loss": 1755.0, "debug/num_tok_total": 1993.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.190377235412598, "train/diffusion_loss": 0.46880972385406494 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1746.0, "debug/num_lat_total": 2238.0, "debug/num_tok_loss": 1746.0, "debug/num_tok_total": 2238.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.153491020202637, "train/diffusion_loss": 0.44733402132987976 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1749.0, "debug/num_lat_total": 2220.0, "debug/num_tok_loss": 1749.0, "debug/num_tok_total": 2220.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.16679859161377, "train/diffusion_loss": 0.4347849488258362 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1705.0, "debug/num_lat_total": 2243.0, "debug/num_tok_loss": 1705.0, "debug/num_tok_total": 2243.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.193336486816406, "train/diffusion_loss": 0.42132002115249634 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1671.0, "debug/num_lat_total": 2218.0, "debug/num_tok_loss": 1671.0, "debug/num_tok_total": 2218.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.164517402648926, "train/diffusion_loss": 0.41408732533454895 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1732.0, "debug/num_lat_total": 2149.0, "debug/num_tok_loss": 1732.0, "debug/num_tok_total": 2149.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.206441879272461, "train/diffusion_loss": 0.4473426342010498 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 2352.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 2352.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.163651466369629, "train/diffusion_loss": 0.39565032720565796 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1795.0, "debug/num_lat_total": 2295.0, "debug/num_tok_loss": 1795.0, "debug/num_tok_total": 2295.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.172779083251953, "train/diffusion_loss": 0.4185372292995453 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1687.0, "debug/num_lat_total": 2142.0, "debug/num_tok_loss": 1687.0, "debug/num_tok_total": 2142.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.184844017028809, "train/diffusion_loss": 0.4559231698513031 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1717.0, "debug/num_lat_total": 2221.0, "debug/num_tok_loss": 1717.0, "debug/num_tok_total": 2221.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.17270565032959, "train/diffusion_loss": 0.44331198930740356 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1717.0, "debug/num_lat_total": 2179.0, "debug/num_tok_loss": 1717.0, "debug/num_tok_total": 2179.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.174978256225586, "train/diffusion_loss": 0.42323049902915955 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1840.0, "debug/num_lat_total": 2161.0, "debug/num_tok_loss": 1840.0, "debug/num_tok_total": 2161.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.170124053955078, "train/diffusion_loss": 0.48068806529045105 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1775.0, "debug/num_lat_total": 2192.0, "debug/num_tok_loss": 1775.0, "debug/num_tok_total": 2192.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.172566413879395, "train/diffusion_loss": 0.4287210702896118 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1728.0, "debug/num_lat_total": 2038.0, "debug/num_tok_loss": 1728.0, "debug/num_tok_total": 2038.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.167974472045898, "train/diffusion_loss": 0.44199228286743164 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1700.0, "debug/num_lat_total": 2257.0, "debug/num_tok_loss": 1700.0, "debug/num_tok_total": 2257.0, "epoch": 4.299065420560748, "step": 30 }, { "epoch": 4.299065420560748, "step": 30, "train/ce_loss": 12.186758995056152, "train/diffusion_loss": 0.4000075161457062 }, { "epoch": 4.299065420560748, "step": 30, "train/learning_rate_real": 1.389556891813458e-06 }, { "debug/num_lat_loss": 1806.0, "debug/num_lat_total": 2203.0, "debug/num_tok_loss": 1806.0, "debug/num_tok_total": 2203.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.169504165649414, "train/diffusion_loss": 0.47231945395469666 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1767.0, "debug/num_lat_total": 2238.0, "debug/num_tok_loss": 1767.0, "debug/num_tok_total": 2238.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.156598091125488, "train/diffusion_loss": 0.4360898733139038 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1719.0, "debug/num_lat_total": 2105.0, "debug/num_tok_loss": 1719.0, "debug/num_tok_total": 2105.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.160431861877441, "train/diffusion_loss": 0.43545302748680115 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1952.0, "debug/num_lat_total": 2575.0, "debug/num_tok_loss": 1952.0, "debug/num_tok_total": 2575.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.136270523071289, "train/diffusion_loss": 0.4197411835193634 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1746.0, "debug/num_lat_total": 2267.0, "debug/num_tok_loss": 1746.0, "debug/num_tok_total": 2267.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.166632652282715, "train/diffusion_loss": 0.466814786195755 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1723.0, "debug/num_lat_total": 2326.0, "debug/num_tok_loss": 1723.0, "debug/num_tok_total": 2326.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.176079750061035, "train/diffusion_loss": 0.40206533670425415 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1705.0, "debug/num_lat_total": 2229.0, "debug/num_tok_loss": 1705.0, "debug/num_tok_total": 2229.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.191839218139648, "train/diffusion_loss": 0.41346275806427 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1785.0, "debug/num_lat_total": 2381.0, "debug/num_tok_loss": 1785.0, "debug/num_tok_total": 2381.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.170215606689453, "train/diffusion_loss": 0.4004969894886017 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1740.0, "debug/num_lat_total": 2062.0, "debug/num_tok_loss": 1740.0, "debug/num_tok_total": 2062.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.164392471313477, "train/diffusion_loss": 0.46367114782333374 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1706.0, "debug/num_lat_total": 2305.0, "debug/num_tok_loss": 1706.0, "debug/num_tok_total": 2305.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.16750717163086, "train/diffusion_loss": 0.4109986424446106 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1668.0, "debug/num_lat_total": 2229.0, "debug/num_tok_loss": 1668.0, "debug/num_tok_total": 2229.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.20853328704834, "train/diffusion_loss": 0.4263615310192108 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1804.0, "debug/num_lat_total": 2314.0, "debug/num_tok_loss": 1804.0, "debug/num_tok_total": 2314.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.14916706085205, "train/diffusion_loss": 0.4520219564437866 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1679.0, "debug/num_lat_total": 2148.0, "debug/num_tok_loss": 1679.0, "debug/num_tok_total": 2148.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.153385162353516, "train/diffusion_loss": 0.42382287979125977 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1724.0, "debug/num_lat_total": 2220.0, "debug/num_tok_loss": 1724.0, "debug/num_tok_total": 2220.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.130288124084473, "train/diffusion_loss": 0.4364231526851654 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1774.0, "debug/num_lat_total": 2283.0, "debug/num_tok_loss": 1774.0, "debug/num_tok_total": 2283.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.175270080566406, "train/diffusion_loss": 0.4409050941467285 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1835.0, "debug/num_lat_total": 2207.0, "debug/num_tok_loss": 1835.0, "debug/num_tok_total": 2207.0, "epoch": 4.4485981308411215, "step": 31 }, { "epoch": 4.4485981308411215, "step": 31, "train/ce_loss": 12.18620491027832, "train/diffusion_loss": 0.44913163781166077 }, { "epoch": 4.4485981308411215, "step": 31, "train/learning_rate_real": 8.95400837299093e-07 }, { "debug/num_lat_loss": 1719.0, "debug/num_lat_total": 2323.0, "debug/num_tok_loss": 1719.0, "debug/num_tok_total": 2323.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.183679580688477, "train/diffusion_loss": 0.41472896933555603 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1646.0, "debug/num_lat_total": 2072.0, "debug/num_tok_loss": 1646.0, "debug/num_tok_total": 2072.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.173807144165039, "train/diffusion_loss": 0.4390676021575928 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 2427.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 2427.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.151962280273438, "train/diffusion_loss": 0.41235706210136414 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1758.0, "debug/num_lat_total": 2354.0, "debug/num_tok_loss": 1758.0, "debug/num_tok_total": 2354.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.149380683898926, "train/diffusion_loss": 0.4324599802494049 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1862.0, "debug/num_lat_total": 2345.0, "debug/num_tok_loss": 1862.0, "debug/num_tok_total": 2345.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.148853302001953, "train/diffusion_loss": 0.420383483171463 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1795.0, "debug/num_lat_total": 2269.0, "debug/num_tok_loss": 1795.0, "debug/num_tok_total": 2269.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.170392036437988, "train/diffusion_loss": 0.4294658899307251 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1760.0, "debug/num_lat_total": 2156.0, "debug/num_tok_loss": 1760.0, "debug/num_tok_total": 2156.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.155299186706543, "train/diffusion_loss": 0.44193097949028015 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1774.0, "debug/num_lat_total": 2170.0, "debug/num_tok_loss": 1774.0, "debug/num_tok_total": 2170.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.195367813110352, "train/diffusion_loss": 0.44668319821357727 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1744.0, "debug/num_lat_total": 2061.0, "debug/num_tok_loss": 1744.0, "debug/num_tok_total": 2061.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.192215919494629, "train/diffusion_loss": 0.4522995352745056 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1809.0, "debug/num_lat_total": 2322.0, "debug/num_tok_loss": 1809.0, "debug/num_tok_total": 2322.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.157405853271484, "train/diffusion_loss": 0.416936457157135 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1643.0, "debug/num_lat_total": 2054.0, "debug/num_tok_loss": 1643.0, "debug/num_tok_total": 2054.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.180978775024414, "train/diffusion_loss": 0.42913898825645447 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1742.0, "debug/num_lat_total": 2406.0, "debug/num_tok_loss": 1742.0, "debug/num_tok_total": 2406.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.150097846984863, "train/diffusion_loss": 0.40735092759132385 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1646.0, "debug/num_lat_total": 1997.0, "debug/num_tok_loss": 1646.0, "debug/num_tok_total": 1997.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.196208000183105, "train/diffusion_loss": 0.43271905183792114 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1702.0, "debug/num_lat_total": 2094.0, "debug/num_tok_loss": 1702.0, "debug/num_tok_total": 2094.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.162835121154785, "train/diffusion_loss": 0.4518500566482544 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1741.0, "debug/num_lat_total": 2201.0, "debug/num_tok_loss": 1741.0, "debug/num_tok_total": 2201.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.16962718963623, "train/diffusion_loss": 0.442145437002182 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1756.0, "debug/num_lat_total": 2102.0, "debug/num_tok_loss": 1756.0, "debug/num_tok_total": 2102.0, "epoch": 4.598130841121495, "step": 32 }, { "epoch": 4.598130841121495, "step": 32, "train/ce_loss": 12.188155174255371, "train/diffusion_loss": 0.4411594271659851 }, { "epoch": 4.598130841121495, "step": 32, "train/learning_rate_real": 5.063378298187843e-07 }, { "debug/num_lat_loss": 1730.0, "debug/num_lat_total": 2254.0, "debug/num_tok_loss": 1730.0, "debug/num_tok_total": 2254.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.188690185546875, "train/diffusion_loss": 0.44849783182144165 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1723.0, "debug/num_lat_total": 2360.0, "debug/num_tok_loss": 1723.0, "debug/num_tok_total": 2360.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.205450057983398, "train/diffusion_loss": 0.400063157081604 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1678.0, "debug/num_lat_total": 2055.0, "debug/num_tok_loss": 1678.0, "debug/num_tok_total": 2055.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.187387466430664, "train/diffusion_loss": 0.46367162466049194 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1666.0, "debug/num_lat_total": 2027.0, "debug/num_tok_loss": 1666.0, "debug/num_tok_total": 2027.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.189556121826172, "train/diffusion_loss": 0.5164213180541992 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1640.0, "debug/num_lat_total": 2208.0, "debug/num_tok_loss": 1640.0, "debug/num_tok_total": 2208.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.173091888427734, "train/diffusion_loss": 0.41635480523109436 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1715.0, "debug/num_lat_total": 2113.0, "debug/num_tok_loss": 1715.0, "debug/num_tok_total": 2113.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.206910133361816, "train/diffusion_loss": 0.4815617799758911 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1737.0, "debug/num_lat_total": 2221.0, "debug/num_tok_loss": 1737.0, "debug/num_tok_total": 2221.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.145868301391602, "train/diffusion_loss": 0.4430966377258301 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1707.0, "debug/num_lat_total": 2296.0, "debug/num_tok_loss": 1707.0, "debug/num_tok_total": 2296.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.134885787963867, "train/diffusion_loss": 0.4013361930847168 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1736.0, "debug/num_lat_total": 2219.0, "debug/num_tok_loss": 1736.0, "debug/num_tok_total": 2219.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.147263526916504, "train/diffusion_loss": 0.43085482716560364 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1685.0, "debug/num_lat_total": 2033.0, "debug/num_tok_loss": 1685.0, "debug/num_tok_total": 2033.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.185604095458984, "train/diffusion_loss": 0.4586755037307739 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1719.0, "debug/num_lat_total": 2199.0, "debug/num_tok_loss": 1719.0, "debug/num_tok_total": 2199.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.170775413513184, "train/diffusion_loss": 0.4456702172756195 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1749.0, "debug/num_lat_total": 2183.0, "debug/num_tok_loss": 1749.0, "debug/num_tok_total": 2183.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.213212966918945, "train/diffusion_loss": 0.4502941966056824 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1718.0, "debug/num_lat_total": 2255.0, "debug/num_tok_loss": 1718.0, "debug/num_tok_total": 2255.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.188613891601562, "train/diffusion_loss": 0.428547739982605 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1810.0, "debug/num_lat_total": 2148.0, "debug/num_tok_loss": 1810.0, "debug/num_tok_total": 2148.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.191283226013184, "train/diffusion_loss": 0.4674947261810303 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1694.0, "debug/num_lat_total": 2097.0, "debug/num_tok_loss": 1694.0, "debug/num_tok_total": 2097.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.186917304992676, "train/diffusion_loss": 0.4404945969581604 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1772.0, "debug/num_lat_total": 2293.0, "debug/num_tok_loss": 1772.0, "debug/num_tok_total": 2293.0, "epoch": 4.747663551401869, "step": 33 }, { "epoch": 4.747663551401869, "step": 33, "train/ce_loss": 12.178369522094727, "train/diffusion_loss": 0.45209217071533203 }, { "epoch": 4.747663551401869, "step": 33, "train/learning_rate_real": 2.258912842161662e-07 }, { "debug/num_lat_loss": 1762.0, "debug/num_lat_total": 2343.0, "debug/num_tok_loss": 1762.0, "debug/num_tok_total": 2343.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.175055503845215, "train/diffusion_loss": 0.44022753834724426 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1747.0, "debug/num_lat_total": 2348.0, "debug/num_tok_loss": 1747.0, "debug/num_tok_total": 2348.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.170550346374512, "train/diffusion_loss": 0.42128288745880127 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1708.0, "debug/num_lat_total": 2088.0, "debug/num_tok_loss": 1708.0, "debug/num_tok_total": 2088.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.167820930480957, "train/diffusion_loss": 0.44098177552223206 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1723.0, "debug/num_lat_total": 2002.0, "debug/num_tok_loss": 1723.0, "debug/num_tok_total": 2002.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.243925094604492, "train/diffusion_loss": 0.44518256187438965 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1705.0, "debug/num_lat_total": 2094.0, "debug/num_tok_loss": 1705.0, "debug/num_tok_total": 2094.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.187417030334473, "train/diffusion_loss": 0.41937023401260376 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1765.0, "debug/num_lat_total": 2297.0, "debug/num_tok_loss": 1765.0, "debug/num_tok_total": 2297.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.190154075622559, "train/diffusion_loss": 0.41703900694847107 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1782.0, "debug/num_lat_total": 2235.0, "debug/num_tok_loss": 1782.0, "debug/num_tok_total": 2235.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.209789276123047, "train/diffusion_loss": 0.4473593533039093 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1774.0, "debug/num_lat_total": 2079.0, "debug/num_tok_loss": 1774.0, "debug/num_tok_total": 2079.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.186965942382812, "train/diffusion_loss": 0.47279080748558044 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1723.0, "debug/num_lat_total": 2052.0, "debug/num_tok_loss": 1723.0, "debug/num_tok_total": 2052.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.189580917358398, "train/diffusion_loss": 0.45107120275497437 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 1821.0, "debug/num_lat_total": 2294.0, "debug/num_tok_loss": 1821.0, "debug/num_tok_total": 2294.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.185731887817383, "train/diffusion_loss": 0.4438938796520233 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 }, { "debug/num_lat_loss": 221.0, "debug/num_lat_total": 275.0, "debug/num_tok_loss": 221.0, "debug/num_tok_total": 275.0, "epoch": 4.897196261682243, "step": 34 }, { "epoch": 4.897196261682243, "step": 34, "train/ce_loss": 12.213896751403809, "train/diffusion_loss": 0.4825238287448883 }, { "epoch": 4.897196261682243, "step": 34, "train/learning_rate_real": 5.6600967836442644e-08 } ], "logging_steps": 10, "max_steps": 35, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.708115641912502e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }