{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:11:01.824634', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 21944.183071258598, 'timestamp': '2025-09-30 22:11:01.832069', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.014772', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 1.0141518115997314, 'timestamp': '2025-09-30 22:11:02.017788', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.099418', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 1.0238981246948242, 'timestamp': '2025-09-30 22:11:02.114769', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:02.185448', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 1.0061830282211304, 'timestamp': '2025-09-30 22:11:02.189853', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:02.255223', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.976143479347229, 'timestamp': '2025-09-30 22:11:02.314818', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:02.377812', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.8643914461135864, 'timestamp': '2025-09-30 22:11:02.382638', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.448858', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.6396591067314148, 'timestamp': '2025-09-30 22:11:02.461410', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.527111', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.7019064426422119, 'timestamp': '2025-09-30 22:11:02.530749', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.588522', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.8302102088928223, 'timestamp': '2025-09-30 22:11:02.599223', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.657305', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.48409557342529297, 'timestamp': '2025-09-30 22:11:02.661735', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:02.730109', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.5603610873222351, 'timestamp': '2025-09-30 22:11:02.734772', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:02.801189', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.49983760714530945, 'timestamp': '2025-09-30 22:11:02.805541', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.863227', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.5132840871810913, 'timestamp': '2025-09-30 22:11:02.872402', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:02.932444', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.32157471776008606, 'timestamp': '2025-09-30 22:11:02.936011', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:02.993978', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.34299203753471375, 'timestamp': '2025-09-30 22:11:02.998248', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:03.055851', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.3779154121875763, 'timestamp': '2025-09-30 22:11:03.060141', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:03.117801', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.41543447971343994, 'timestamp': '2025-09-30 22:11:03.133407', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:03.189389', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.3043370246887207, 'timestamp': '2025-09-30 22:11:03.194230', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:03.251103', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.3361082971096039, 'timestamp': '2025-09-30 22:11:03.254723', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:03.335233', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.414169579744339, 'timestamp': '2025-09-30 22:11:03.339342', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:03.396666', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.3210541605949402, 'timestamp': '2025-09-30 22:11:03.407281', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:03.472555', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.477262020111084, 'timestamp': '2025-09-30 22:11:03.476690', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:03.533968', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.34772127866744995, 'timestamp': '2025-09-30 22:11:03.537099', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:03.598497', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.25287681818008423, 'timestamp': '2025-09-30 22:11:03.610253', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:03.669306', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.38128381967544556, 'timestamp': '2025-09-30 22:11:03.678653', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:03.738441', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.27589669823646545, 'timestamp': '2025-09-30 22:11:03.742282', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:03.816930', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.3020755350589752, 'timestamp': '2025-09-30 22:11:03.821403', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:03.878603', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.40392544865608215, 'timestamp': '2025-09-30 22:11:03.884813', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:03.946772', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.351497083902359, 'timestamp': '2025-09-30 22:11:03.954590', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:04.010948', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.18241287767887115, 'timestamp': '2025-09-30 22:11:04.015639', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:04.073325', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.15925690531730652, 'timestamp': '2025-09-30 22:11:04.076600', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.133793', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.26860371232032776, 'timestamp': '2025-09-30 22:11:04.137501', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:04.195402', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.2407626211643219, 'timestamp': '2025-09-30 22:11:04.202873', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.260204', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.3036354184150696, 'timestamp': '2025-09-30 22:11:04.265286', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.322561', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.3656114339828491, 'timestamp': '2025-09-30 22:11:04.326189', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:04.384007', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.35570228099823, 'timestamp': '2025-09-30 22:11:04.387374', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:04.444310', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.24182432889938354, 'timestamp': '2025-09-30 22:11:04.451794', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.509278', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.3558001220226288, 'timestamp': '2025-09-30 22:11:04.512535', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:04.580733', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.2427128106355667, 'timestamp': '2025-09-30 22:11:04.584446', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:04.644211', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.37081560492515564, 'timestamp': '2025-09-30 22:11:04.648957', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.709811', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.29114052653312683, 'timestamp': '2025-09-30 22:11:04.717157', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:04.774248', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.20639586448669434, 'timestamp': '2025-09-30 22:11:04.779004', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.840050', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.25706276297569275, 'timestamp': '2025-09-30 22:11:04.844321', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.902714', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.20794637501239777, 'timestamp': '2025-09-30 22:11:04.906252', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:04.972371', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.31174248456954956, 'timestamp': '2025-09-30 22:11:04.980751', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:05.039772', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.2816590368747711, 'timestamp': '2025-09-30 22:11:05.044179', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:05.104930', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.26323047280311584, 'timestamp': '2025-09-30 22:11:05.109095', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:05.167977', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.24142582714557648, 'timestamp': '2025-09-30 22:11:05.179764', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:05.238917', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.31557297706604004, 'timestamp': '2025-09-30 22:11:05.246195', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:05.304321', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.2089298814535141, 'timestamp': '2025-09-30 22:11:05.307810', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:05.366154', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.21469223499298096, 'timestamp': '2025-09-30 22:11:05.371614', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:05.428823', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.2581331133842468, 'timestamp': '2025-09-30 22:11:05.432942', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:05.491854', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.36290204524993896, 'timestamp': '2025-09-30 22:11:05.501116', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:05.561775', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.23279844224452972, 'timestamp': '2025-09-30 22:11:05.566157', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:05.634446', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.2208818644285202, 'timestamp': '2025-09-30 22:11:05.638355', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:05.696078', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.277754545211792, 'timestamp': '2025-09-30 22:11:05.699466', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:05.773352', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.1596258282661438, 'timestamp': '2025-09-30 22:11:05.780690', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:05.838877', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.1745772808790207, 'timestamp': '2025-09-30 22:11:05.846754', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:05.903449', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.2276635766029358, 'timestamp': '2025-09-30 22:11:05.906146', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:05.963504', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.22495746612548828, 'timestamp': '2025-09-30 22:11:05.971990', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:06.034749', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.1820623129606247, 'timestamp': '2025-09-30 22:11:06.042436', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:06.099546', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.19335399568080902, 'timestamp': '2025-09-30 22:11:06.102243', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:06.159591', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.16704414784908295, 'timestamp': '2025-09-30 22:11:06.168132', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:06.224555', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.2208624631166458, 'timestamp': '2025-09-30 22:11:06.230344', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:06.286527', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.2863380014896393, 'timestamp': '2025-09-30 22:11:06.293205', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:06.353435', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.1776004582643509, 'timestamp': '2025-09-30 22:11:06.356223', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:06.416385', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.22756150364875793, 'timestamp': '2025-09-30 22:11:06.419277', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:06.475987', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.2790071666240692, 'timestamp': '2025-09-30 22:11:06.479307', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:06.536932', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.22513777017593384, 'timestamp': '2025-09-30 22:11:06.544882', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:06.601199', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.23316918313503265, 'timestamp': '2025-09-30 22:11:06.605897', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:06.668235', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.2402951866388321, 'timestamp': '2025-09-30 22:11:06.672776', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:06.731092', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.18091554939746857, 'timestamp': '2025-09-30 22:11:06.734346', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:06.794243', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.2502370774745941, 'timestamp': '2025-09-30 22:11:06.800973', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:06.856854', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.19973407685756683, 'timestamp': '2025-09-30 22:11:06.859327', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:06.916317', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.23994876444339752, 'timestamp': '2025-09-30 22:11:06.918834', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:06.985574', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.2051016390323639, 'timestamp': '2025-09-30 22:11:06.988798', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:07.045891', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.35054367780685425, 'timestamp': '2025-09-30 22:11:07.054782', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:07.112163', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.16051612794399261, 'timestamp': '2025-09-30 22:11:07.115250', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:07.173884', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.254097580909729, 'timestamp': '2025-09-30 22:11:07.177510', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:07.235841', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.1868804693222046, 'timestamp': '2025-09-30 22:11:07.238468', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:07.297626', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.26402372121810913, 'timestamp': '2025-09-30 22:11:07.310629', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:07.367804', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.2612054944038391, 'timestamp': '2025-09-30 22:11:07.370904', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:07.441459', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.20258115231990814, 'timestamp': '2025-09-30 22:11:07.444388', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:07.500787', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.16176065802574158, 'timestamp': '2025-09-30 22:11:07.505064', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:07.561756', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.2818806767463684, 'timestamp': '2025-09-30 22:11:07.568254', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:07.626164', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.1716175526380539, 'timestamp': '2025-09-30 22:11:07.628905', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:07.687192', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.2042633295059204, 'timestamp': '2025-09-30 22:11:07.690620', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:07.747072', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.21300899982452393, 'timestamp': '2025-09-30 22:11:07.750939', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:07.807794', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.2377365082502365, 'timestamp': '2025-09-30 22:11:07.815723', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:07.870872', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.1656094789505005, 'timestamp': '2025-09-30 22:11:07.874515', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:07.941291', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.2620551586151123, 'timestamp': '2025-09-30 22:11:07.946561', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:08.004675', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.21174751222133636, 'timestamp': '2025-09-30 22:11:08.007769', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:08.065392', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.18516407907009125, 'timestamp': '2025-09-30 22:11:08.071582', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:08.130048', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.2963278889656067, 'timestamp': '2025-09-30 22:11:08.132799', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:08.190432', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.22709378600120544, 'timestamp': '2025-09-30 22:11:08.193811', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:08.250959', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.20586739480495453, 'timestamp': '2025-09-30 22:11:08.258589', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:08.314632', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.21622374653816223, 'timestamp': '2025-09-30 22:11:08.321612', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:08.379047', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.18783539533615112, 'timestamp': '2025-09-30 22:11:08.382857', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:08.439963', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.21617913246154785, 'timestamp': '2025-09-30 22:11:08.444915', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:08.502420', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.16479282081127167, 'timestamp': '2025-09-30 22:11:08.506635', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:08.563689', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.2363627701997757, 'timestamp': '2025-09-30 22:11:08.570556', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:08.626094', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.24795682728290558, 'timestamp': '2025-09-30 22:11:08.628569', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:08.688678', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.2421124130487442, 'timestamp': '2025-09-30 22:11:08.691759', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:08.748664', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.33260488510131836, 'timestamp': '2025-09-30 22:11:08.757952', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:08.815057', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.20684555172920227, 'timestamp': '2025-09-30 22:11:08.821811', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:08.882965', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.22343343496322632, 'timestamp': '2025-09-30 22:11:08.886535', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:08.943647', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.26267313957214355, 'timestamp': '2025-09-30 22:11:08.945877', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:09.001941', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.322679340839386, 'timestamp': '2025-09-30 22:11:09.005555', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:09.061947', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.35919368267059326, 'timestamp': '2025-09-30 22:11:09.067779', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:09.123834', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.18541350960731506, 'timestamp': '2025-09-30 22:11:09.126748', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:09.182459', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.2728431820869446, 'timestamp': '2025-09-30 22:11:09.186023', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:09.242373', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.1482985019683838, 'timestamp': '2025-09-30 22:11:09.245452', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:09.312398', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.18701709806919098, 'timestamp': '2025-09-30 22:11:09.324940', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:09.383179', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.2394803911447525, 'timestamp': '2025-09-30 22:11:09.387992', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:09.444228', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.36096474528312683, 'timestamp': '2025-09-30 22:11:09.449771', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:09.506842', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.18447913229465485, 'timestamp': '2025-09-30 22:11:09.509629', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:09.565582', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.2672875225543976, 'timestamp': '2025-09-30 22:11:09.572955', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:09.628404', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.1931868940591812, 'timestamp': '2025-09-30 22:11:09.632580', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:09.696902', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.2186211794614792, 'timestamp': '2025-09-30 22:11:09.702025', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:09.760097', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.23733635246753693, 'timestamp': '2025-09-30 22:11:09.763104', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:09.820480', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.2797948718070984, 'timestamp': '2025-09-30 22:11:09.831444', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:09.886891', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.23722362518310547, 'timestamp': '2025-09-30 22:11:09.892501', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:09.950691', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.19958172738552094, 'timestamp': '2025-09-30 22:11:09.953723', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:10.011900', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.31227198243141174, 'timestamp': '2025-09-30 22:11:10.016077', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:10.072302', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.20264022052288055, 'timestamp': '2025-09-30 22:11:10.078957', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:10.135188', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.2355821281671524, 'timestamp': '2025-09-30 22:11:10.144184', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:10.206771', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.20606660842895508, 'timestamp': '2025-09-30 22:11:10.211374', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:10.270736', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.22938399016857147, 'timestamp': '2025-09-30 22:11:10.273946', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:10.329594', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.41237831115722656, 'timestamp': '2025-09-30 22:11:10.340317', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:10.396658', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.20980344712734222, 'timestamp': '2025-09-30 22:11:10.399970', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:10.456645', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.19251017272472382, 'timestamp': '2025-09-30 22:11:10.459187', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:10.523318', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.1744726300239563, 'timestamp': '2025-09-30 22:11:10.526101', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:10.581600', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.1995943784713745, 'timestamp': '2025-09-30 22:11:10.588651', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:10.645187', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.2519894540309906, 'timestamp': '2025-09-30 22:11:10.652878', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:10.709760', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.29408031702041626, 'timestamp': '2025-09-30 22:11:10.713512', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:10.778579', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.1828557252883911, 'timestamp': '2025-09-30 22:11:10.781693', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:10.840156', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.19846464693546295, 'timestamp': '2025-09-30 22:11:10.847072', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:10.903830', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.26094886660575867, 'timestamp': '2025-09-30 22:11:10.907990', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:10.965381', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.21346229314804077, 'timestamp': '2025-09-30 22:11:10.967970', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:11.023944', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.2197571098804474, 'timestamp': '2025-09-30 22:11:11.027204', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:11.085773', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.2778100073337555, 'timestamp': '2025-09-30 22:11:11.093546', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:11.151633', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.2327246069908142, 'timestamp': '2025-09-30 22:11:11.155058', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:11.219292', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.185882106423378, 'timestamp': '2025-09-30 22:11:11.226259', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:11.282902', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.22018082439899445, 'timestamp': '2025-09-30 22:11:11.285143', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:11.340855', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.29392629861831665, 'timestamp': '2025-09-30 22:11:11.351675', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:11.407717', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.21298232674598694, 'timestamp': '2025-09-30 22:11:11.410775', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:11.466291', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.2751856744289398, 'timestamp': '2025-09-30 22:11:11.468695', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:11.532373', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.2661009728908539, 'timestamp': '2025-09-30 22:11:11.536138', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:11.594460', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.2237762212753296, 'timestamp': '2025-09-30 22:11:11.609397', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:11.666744', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.13422060012817383, 'timestamp': '2025-09-30 22:11:11.670141', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:11.728229', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.26279130578041077, 'timestamp': '2025-09-30 22:11:11.737821', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:11.804509', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.16454952955245972, 'timestamp': '2025-09-30 22:11:11.811913', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:11.871660', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.1784753054380417, 'timestamp': '2025-09-30 22:11:11.878363', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:11.945128', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.2740514278411865, 'timestamp': '2025-09-30 22:11:11.951758', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:12.009507', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.29743313789367676, 'timestamp': '2025-09-30 22:11:12.014992', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:11:12.086536', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.3455103933811188, 'timestamp': '2025-09-30 22:11:12.090384', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:12.151403', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.23126749694347382, 'timestamp': '2025-09-30 22:11:12.159499', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:12.217411', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.1689598262310028, 'timestamp': '2025-09-30 22:11:12.227254', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:12.284864', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.25199952721595764, 'timestamp': '2025-09-30 22:11:12.291912', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:12.349084', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.18228891491889954, 'timestamp': '2025-09-30 22:11:12.354886', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:12.414205', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.2447935789823532, 'timestamp': '2025-09-30 22:11:12.421209', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:12.479118', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.2383670210838318, 'timestamp': '2025-09-30 22:11:12.483627', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:12.546236', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.34740445017814636, 'timestamp': '2025-09-30 22:11:12.551400', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:12.620483', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.20269368588924408, 'timestamp': '2025-09-30 22:11:12.633155', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:12.691953', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.17137548327445984, 'timestamp': '2025-09-30 22:11:12.697984', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:12.759931', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.27755600214004517, 'timestamp': '2025-09-30 22:11:12.763496', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:12.829903', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.16810278594493866, 'timestamp': '2025-09-30 22:11:12.837858', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:12.901077', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.13301169872283936, 'timestamp': '2025-09-30 22:11:12.904938', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:12.961815', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.22856397926807404, 'timestamp': '2025-09-30 22:11:12.972015', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:13.036060', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.14929096400737762, 'timestamp': '2025-09-30 22:11:13.041724', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:13.098641', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.18548427522182465, 'timestamp': '2025-09-30 22:11:13.106198', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:13.172331', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.1787216067314148, 'timestamp': '2025-09-30 22:11:13.176232', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:13.239991', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.3090249001979828, 'timestamp': '2025-09-30 22:11:13.248994', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:13.305738', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.28042274713516235, 'timestamp': '2025-09-30 22:11:13.309115', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:13.374387', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.2563169598579407, 'timestamp': '2025-09-30 22:11:13.376891', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:13.432866', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.28195837140083313, 'timestamp': '2025-09-30 22:11:13.438542', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:13.495895', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.2768062651157379, 'timestamp': '2025-09-30 22:11:13.503211', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:13.558441', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.26894432306289673, 'timestamp': '2025-09-30 22:11:13.566232', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:13.623067', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.1682896465063095, 'timestamp': '2025-09-30 22:11:13.629031', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:13.685718', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.23258423805236816, 'timestamp': '2025-09-30 22:11:13.690633', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:13.748628', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.18135759234428406, 'timestamp': '2025-09-30 22:11:13.756281', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:13.812391', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.2561184763908386, 'timestamp': '2025-09-30 22:11:13.815462', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:13.871259', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.17158576846122742, 'timestamp': '2025-09-30 22:11:13.875093', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:13.932072', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.23038610816001892, 'timestamp': '2025-09-30 22:11:13.935425', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:13.991969', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.16770082712173462, 'timestamp': '2025-09-30 22:11:13.999312', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:14.056329', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.2018687129020691, 'timestamp': '2025-09-30 22:11:14.059700', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:14.120719', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.1412816047668457, 'timestamp': '2025-09-30 22:11:14.124289', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:14.187628', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.19572943449020386, 'timestamp': '2025-09-30 22:11:14.191759', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:14.249527', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.2008201628923416, 'timestamp': '2025-09-30 22:11:14.256879', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:11:14.314011', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.25130215287208557, 'timestamp': '2025-09-30 22:11:14.318979', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:14.376104', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.33334869146347046, 'timestamp': '2025-09-30 22:11:14.387073', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:14.445023', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.15538795292377472, 'timestamp': '2025-09-30 22:11:14.448634', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:14.506104', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.33727020025253296, 'timestamp': '2025-09-30 22:11:14.512171', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:14.567945', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.241719588637352, 'timestamp': '2025-09-30 22:11:14.570434', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:14.632009', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.25095298886299133, 'timestamp': '2025-09-30 22:11:14.642901', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:14.720536', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.19867148995399475, 'timestamp': '2025-09-30 22:11:14.723593', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:14.781112', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.22967572510242462, 'timestamp': '2025-09-30 22:11:14.787414', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:14.847520', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.20597897469997406, 'timestamp': '2025-09-30 22:11:14.850499', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:14.907670', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.32084453105926514, 'timestamp': '2025-09-30 22:11:14.910898', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:14.976501', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.15583010017871857, 'timestamp': '2025-09-30 22:11:14.980066', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:15.037895', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.15098260343074799, 'timestamp': '2025-09-30 22:11:15.044445', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:15.107648', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.3429071605205536, 'timestamp': '2025-09-30 22:11:15.112653', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:15.171017', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.20016522705554962, 'timestamp': '2025-09-30 22:11:15.175978', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:15.232937', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.14551575481891632, 'timestamp': '2025-09-30 22:11:15.237127', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:15.294646', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.260636568069458, 'timestamp': '2025-09-30 22:11:15.302652', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:15.358433', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.34972038865089417, 'timestamp': '2025-09-30 22:11:15.361637', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:15.418532', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.15546295046806335, 'timestamp': '2025-09-30 22:11:15.421769', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:15.478951', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.20001843571662903, 'timestamp': '2025-09-30 22:11:15.482237', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:15.543960', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.18098364770412445, 'timestamp': '2025-09-30 22:11:15.556369', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:15.615458', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.20800213515758514, 'timestamp': '2025-09-30 22:11:15.619732', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:15.681629', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.3534855842590332, 'timestamp': '2025-09-30 22:11:15.690025', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:15.750700', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.15703463554382324, 'timestamp': '2025-09-30 22:11:15.753383', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:15.809536', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.2543777823448181, 'timestamp': '2025-09-30 22:11:15.816602', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:15.873345', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.16740776598453522, 'timestamp': '2025-09-30 22:11:15.877513', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:15.934190', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.263998806476593, 'timestamp': '2025-09-30 22:11:15.938460', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:15.998066', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.180010125041008, 'timestamp': '2025-09-30 22:11:16.005577', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:16.062950', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.30827653408050537, 'timestamp': '2025-09-30 22:11:16.069476', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:16.129306', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.20814362168312073, 'timestamp': '2025-09-30 22:11:16.132252', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:16.192651', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.2154381275177002, 'timestamp': '2025-09-30 22:11:16.196346', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:16.253481', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.32601702213287354, 'timestamp': '2025-09-30 22:11:16.257109', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:16.322957', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.2690701484680176, 'timestamp': '2025-09-30 22:11:16.329888', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:16.386290', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.20892056822776794, 'timestamp': '2025-09-30 22:11:16.389505', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:16.446783', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.13245879113674164, 'timestamp': '2025-09-30 22:11:16.454582', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:16.510563', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.22408424317836761, 'timestamp': '2025-09-30 22:11:16.514479', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:16.571117', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.2169708013534546, 'timestamp': '2025-09-30 22:11:16.577528', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:16.642365', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.1914329081773758, 'timestamp': '2025-09-30 22:11:16.648989', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:16.709074', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.2776019871234894, 'timestamp': '2025-09-30 22:11:16.712146', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:16.774621', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.2331928163766861, 'timestamp': '2025-09-30 22:11:16.777737', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:16.836526', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.19476789236068726, 'timestamp': '2025-09-30 22:11:16.844756', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:16.902204', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.1911279708147049, 'timestamp': '2025-09-30 22:11:16.905143', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:16.965732', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.2768174409866333, 'timestamp': '2025-09-30 22:11:16.969204', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.026114', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.18132886290550232, 'timestamp': '2025-09-30 22:11:17.028837', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.085957', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.28832772374153137, 'timestamp': '2025-09-30 22:11:17.092132', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:17.147859', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.23134827613830566, 'timestamp': '2025-09-30 22:11:17.152468', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:17.218706', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.17986898124217987, 'timestamp': '2025-09-30 22:11:17.221588', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:17.280061', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.26239052414894104, 'timestamp': '2025-09-30 22:11:17.283356', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:17.339116', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.151981383562088, 'timestamp': '2025-09-30 22:11:17.345127', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.401370', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.2517884075641632, 'timestamp': '2025-09-30 22:11:17.405530', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.461663', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.2701409161090851, 'timestamp': '2025-09-30 22:11:17.464103', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.522072', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.1703360378742218, 'timestamp': '2025-09-30 22:11:17.525067', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:17.584590', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.21378006041049957, 'timestamp': '2025-09-30 22:11:17.590950', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.647192', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.23271134495735168, 'timestamp': '2025-09-30 22:11:17.652365', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:17.709872', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.20449168980121613, 'timestamp': '2025-09-30 22:11:17.714537', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.778120', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.38010701537132263, 'timestamp': '2025-09-30 22:11:17.784214', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:17.845193', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.22545126080513, 'timestamp': '2025-09-30 22:11:17.853020', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:17.909686', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.28659889101982117, 'timestamp': '2025-09-30 22:11:17.914868', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:17.971765', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.18616420030593872, 'timestamp': '2025-09-30 22:11:17.976533', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:18.034383', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.1681555062532425, 'timestamp': '2025-09-30 22:11:18.039139', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:18.095968', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.13636483252048492, 'timestamp': '2025-09-30 22:11:18.109093', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:18.167890', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.27229952812194824, 'timestamp': '2025-09-30 22:11:18.172529', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:18.228056', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.219890296459198, 'timestamp': '2025-09-30 22:11:18.232643', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:18.290660', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.1968654841184616, 'timestamp': '2025-09-30 22:11:18.293397', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:18.350265', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.20262080430984497, 'timestamp': '2025-09-30 22:11:18.357584', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:18.412979', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.22817906737327576, 'timestamp': '2025-09-30 22:11:18.415950', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:18.472653', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.19948716461658478, 'timestamp': '2025-09-30 22:11:18.475643', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:18.535126', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.1307208240032196, 'timestamp': '2025-09-30 22:11:18.538255', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:18.600737', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.15340031683444977, 'timestamp': '2025-09-30 22:11:18.606704', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:18.668400', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.25713491439819336, 'timestamp': '2025-09-30 22:11:18.671345', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:18.728464', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.14927756786346436, 'timestamp': '2025-09-30 22:11:18.731570', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:18.788965', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.1329701542854309, 'timestamp': '2025-09-30 22:11:18.795523', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:18.857061', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.2527112662792206, 'timestamp': '2025-09-30 22:11:18.863779', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:18.919053', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.16915465891361237, 'timestamp': '2025-09-30 22:11:18.924290', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:18.988142', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.17409731447696686, 'timestamp': '2025-09-30 22:11:18.992011', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:19.047992', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.21739132702350616, 'timestamp': '2025-09-30 22:11:19.052178', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:11:19.137983', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.24137383699417114, 'timestamp': '2025-09-30 22:11:19.145649', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:19.202851', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.23258869349956512, 'timestamp': '2025-09-30 22:11:19.206727', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:19.264733', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.15647302567958832, 'timestamp': '2025-09-30 22:11:19.269626', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:19.329833', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.1386183798313141, 'timestamp': '2025-09-30 22:11:19.335351', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:19.392823', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.2754950225353241, 'timestamp': '2025-09-30 22:11:19.399369', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:19.456122', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.18118591606616974, 'timestamp': '2025-09-30 22:11:19.463904', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:19.522444', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.16827140748500824, 'timestamp': '2025-09-30 22:11:19.525021', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:19.582179', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.25525569915771484, 'timestamp': '2025-09-30 22:11:19.586753', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:19.649200', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.24110642075538635, 'timestamp': '2025-09-30 22:11:19.656647', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:19.719382', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.1429058313369751, 'timestamp': '2025-09-30 22:11:19.723375', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:19.787110', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.20561380684375763, 'timestamp': '2025-09-30 22:11:19.791979', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:19.848164', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.18233153223991394, 'timestamp': '2025-09-30 22:11:19.850360', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:19.907389', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.15620480477809906, 'timestamp': '2025-09-30 22:11:19.913408', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:19.969339', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.1476401835680008, 'timestamp': '2025-09-30 22:11:19.972911', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:20.029310', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.2891086935997009, 'timestamp': '2025-09-30 22:11:20.031923', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:20.089523', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.19659139215946198, 'timestamp': '2025-09-30 22:11:20.094765', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:20.151901', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.21668963134288788, 'timestamp': '2025-09-30 22:11:20.159278', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:20.225803', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.17899920046329498, 'timestamp': '2025-09-30 22:11:20.231451', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:20.288516', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.17275370657444, 'timestamp': '2025-09-30 22:11:20.290756', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:20.349528', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.19185052812099457, 'timestamp': '2025-09-30 22:11:20.352012', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:20.407843', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.19250322878360748, 'timestamp': '2025-09-30 22:11:20.413887', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:20.480322', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.22257353365421295, 'timestamp': '2025-09-30 22:11:20.484992', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:20.549415', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.261634886264801, 'timestamp': '2025-09-30 22:11:20.555035', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:20.614118', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.3201264441013336, 'timestamp': '2025-09-30 22:11:20.626364', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:20.683648', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.33247873187065125, 'timestamp': '2025-09-30 22:11:20.690865', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:20.750670', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.27441027760505676, 'timestamp': '2025-09-30 22:11:20.754895', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:20.813703', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.17650139331817627, 'timestamp': '2025-09-30 22:11:20.816382', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:20.873150', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.173254132270813, 'timestamp': '2025-09-30 22:11:20.875596', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:20.932659', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.27710118889808655, 'timestamp': '2025-09-30 22:11:20.942770', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:21.002816', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.33714139461517334, 'timestamp': '2025-09-30 22:11:21.017309', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:21.081106', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.20850998163223267, 'timestamp': '2025-09-30 22:11:21.090800', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:21.167301', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.17899355292320251, 'timestamp': '2025-09-30 22:11:21.172928', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:21.240737', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.16764861345291138, 'timestamp': '2025-09-30 22:11:21.248172', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:21.305631', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.17565025389194489, 'timestamp': '2025-09-30 22:11:21.323395', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:21.385777', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.13605119287967682, 'timestamp': '2025-09-30 22:11:21.388750', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:21.453727', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.2532873749732971, 'timestamp': '2025-09-30 22:11:21.460448', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:21.520518', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.2075301706790924, 'timestamp': '2025-09-30 22:11:21.532395', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:21.599498', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.2832823693752289, 'timestamp': '2025-09-30 22:11:21.604129', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:21.665594', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.24607689678668976, 'timestamp': '2025-09-30 22:11:21.673464', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:21.732523', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.1921357363462448, 'timestamp': '2025-09-30 22:11:21.754568', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:21.814617', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.1992136538028717, 'timestamp': '2025-09-30 22:11:21.845063', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-30 22:11:21.944936', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.22195610404014587, 'timestamp': '2025-09-30 22:11:21.960675', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:22.022946', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.290060818195343, 'timestamp': '2025-09-30 22:11:22.036145', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:22.120739', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.22888252139091492, 'timestamp': '2025-09-30 22:11:22.129279', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:22.196267', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.1670190542936325, 'timestamp': '2025-09-30 22:11:22.204493', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:22.260130', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.16609983146190643, 'timestamp': '2025-09-30 22:11:22.262985', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:22.319549', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.22837130725383759, 'timestamp': '2025-09-30 22:11:22.326290', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:22.383413', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.20764167606830597, 'timestamp': '2025-09-30 22:11:22.386986', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:22.454329', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.18577103316783905, 'timestamp': '2025-09-30 22:11:22.468059', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:22.533607', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.13887791335582733, 'timestamp': '2025-09-30 22:11:22.538442', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:22.600747', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.20951662957668304, 'timestamp': '2025-09-30 22:11:22.604096', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:22.661525', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.26427268981933594, 'timestamp': '2025-09-30 22:11:22.665215', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:22.724518', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.15126675367355347, 'timestamp': '2025-09-30 22:11:22.733006', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:22.790108', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.1798294186592102, 'timestamp': '2025-09-30 22:11:22.796755', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:22.852223', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.13435910642147064, 'timestamp': '2025-09-30 22:11:22.854921', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:22.912166', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.2153344601392746, 'timestamp': '2025-09-30 22:11:22.915162', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:22.971053', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.25850650668144226, 'timestamp': '2025-09-30 22:11:22.978127', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:23.040088', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.21808592975139618, 'timestamp': '2025-09-30 22:11:23.044355', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:23.101230', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.1806134730577469, 'timestamp': '2025-09-30 22:11:23.105425', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:23.172290', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.2112140655517578, 'timestamp': '2025-09-30 22:11:23.182012', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:23.239063', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.18559734523296356, 'timestamp': '2025-09-30 22:11:23.245661', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:23.306040', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.16626913845539093, 'timestamp': '2025-09-30 22:11:23.308870', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:23.366305', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.29858478903770447, 'timestamp': '2025-09-30 22:11:23.368591', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:23.426311', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.12039308995008469, 'timestamp': '2025-09-30 22:11:23.428573', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:23.489231', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.22699138522148132, 'timestamp': '2025-09-30 22:11:23.495522', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:23.551369', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.2540915906429291, 'timestamp': '2025-09-30 22:11:23.560394', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:23.616529', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.1727103590965271, 'timestamp': '2025-09-30 22:11:23.623435', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:23.683080', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.1724255532026291, 'timestamp': '2025-09-30 22:11:23.688963', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:23.745722', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.25627776980400085, 'timestamp': '2025-09-30 22:11:23.755369', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:23.810742', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.1393745094537735, 'timestamp': '2025-09-30 22:11:23.817731', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:23.877834', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.2699994444847107, 'timestamp': '2025-09-30 22:11:23.881317', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:23.937270', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.127903550863266, 'timestamp': '2025-09-30 22:11:23.940202', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:23.997313', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.20221173763275146, 'timestamp': '2025-09-30 22:11:24.003519', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:24.058841', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.20223769545555115, 'timestamp': '2025-09-30 22:11:24.061909', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:24.122599', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.16542968153953552, 'timestamp': '2025-09-30 22:11:24.126194', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:24.188083', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.30943337082862854, 'timestamp': '2025-09-30 22:11:24.191355', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:24.251549', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.2727980315685272, 'timestamp': '2025-09-30 22:11:24.257284', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:24.315429', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.25001317262649536, 'timestamp': '2025-09-30 22:11:24.318679', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:24.378199', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.16744406521320343, 'timestamp': '2025-09-30 22:11:24.380528', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:24.441333', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.21455025672912598, 'timestamp': '2025-09-30 22:11:24.444907', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:24.501467', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.13912183046340942, 'timestamp': '2025-09-30 22:11:24.512661', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:24.575546', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.270215779542923, 'timestamp': '2025-09-30 22:11:24.578410', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:24.649448', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.2640071213245392, 'timestamp': '2025-09-30 22:11:24.655876', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:24.712444', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.2615017890930176, 'timestamp': '2025-09-30 22:11:24.715900', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:24.771927', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.2283397763967514, 'timestamp': '2025-09-30 22:11:24.787698', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:24.844272', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.193696528673172, 'timestamp': '2025-09-30 22:11:24.849285', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:24.906203', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.17072194814682007, 'timestamp': '2025-09-30 22:11:24.910134', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:24.968646', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.1669558733701706, 'timestamp': '2025-09-30 22:11:24.971320', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:25.027171', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.19179019331932068, 'timestamp': '2025-09-30 22:11:25.033547', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:25.088551', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.13368794322013855, 'timestamp': '2025-09-30 22:11:25.091513', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:25.150384', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.11803465336561203, 'timestamp': '2025-09-30 22:11:25.152909', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:25.209373', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.19764527678489685, 'timestamp': '2025-09-30 22:11:25.211960', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:25.273513', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.3537570536136627, 'timestamp': '2025-09-30 22:11:25.279181', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:25.335693', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.21379049122333527, 'timestamp': '2025-09-30 22:11:25.338637', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:25.394672', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.08651112765073776, 'timestamp': '2025-09-30 22:11:25.396881', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:25.454579', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.21354429423809052, 'timestamp': '2025-09-30 22:11:25.457094', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:25.513590', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.21610897779464722, 'timestamp': '2025-09-30 22:11:25.520084', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:25.579142', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.16844961047172546, 'timestamp': '2025-09-30 22:11:25.582446', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:25.651364', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.21439383924007416, 'timestamp': '2025-09-30 22:11:25.655860', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:25.713125', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.15779002010822296, 'timestamp': '2025-09-30 22:11:25.716391', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:25.772859', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.18669357895851135, 'timestamp': '2025-09-30 22:11:25.779591', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:25.836336', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.25129106640815735, 'timestamp': '2025-09-30 22:11:25.839241', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:25.900714', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.19947904348373413, 'timestamp': '2025-09-30 22:11:25.903035', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:25.960057', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.33611804246902466, 'timestamp': '2025-09-30 22:11:25.962777', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:26.020452', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.13253681361675262, 'timestamp': '2025-09-30 22:11:26.026010', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:26.085051', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.23276959359645844, 'timestamp': '2025-09-30 22:11:26.088064', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:26.150676', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.12855221331119537, 'timestamp': '2025-09-30 22:11:26.153242', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:26.215589', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.22486698627471924, 'timestamp': '2025-09-30 22:11:26.227994', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:26.287316', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.25658881664276123, 'timestamp': '2025-09-30 22:11:26.321477', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:11:26.379947', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.14332681894302368, 'timestamp': '2025-09-30 22:11:26.383361', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:26.449183', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.21333445608615875, 'timestamp': '2025-09-30 22:11:26.455640', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:26.513379', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.14784128963947296, 'timestamp': '2025-09-30 22:11:26.517299', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:26.578876', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.15252567827701569, 'timestamp': '2025-09-30 22:11:26.586380', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:26.643955', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.19798697531223297, 'timestamp': '2025-09-30 22:11:26.653295', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:26.710476', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.16836285591125488, 'timestamp': '2025-09-30 22:11:26.713738', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:26.770179', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.2019079625606537, 'timestamp': '2025-09-30 22:11:26.772894', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:26.829683', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.29172247648239136, 'timestamp': '2025-09-30 22:11:26.835074', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:26.891365', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.2499861866235733, 'timestamp': '2025-09-30 22:11:26.894510', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:26.953825', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.32544687390327454, 'timestamp': '2025-09-30 22:11:26.957512', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:27.014969', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.16003187000751495, 'timestamp': '2025-09-30 22:11:27.018670', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:27.076419', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.2227933555841446, 'timestamp': '2025-09-30 22:11:27.083459', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:27.146941', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.19918890297412872, 'timestamp': '2025-09-30 22:11:27.150947', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:27.210533', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.17657753825187683, 'timestamp': '2025-09-30 22:11:27.213558', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:27.274642', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.13988088071346283, 'timestamp': '2025-09-30 22:11:27.277557', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:27.341713', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.26247745752334595, 'timestamp': '2025-09-30 22:11:27.356684', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:27.415522', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.17975641787052155, 'timestamp': '2025-09-30 22:11:27.420262', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:27.477113', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.21223507821559906, 'timestamp': '2025-09-30 22:11:27.483031', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:27.539653', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.24385586380958557, 'timestamp': '2025-09-30 22:11:27.542526', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:27.598961', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.26097068190574646, 'timestamp': '2025-09-30 22:11:27.605312', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:27.668619', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.17873819172382355, 'timestamp': '2025-09-30 22:11:27.671695', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:27.730859', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.13879646360874176, 'timestamp': '2025-09-30 22:11:27.733636', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:27.790516', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.306294322013855, 'timestamp': '2025-09-30 22:11:27.793779', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:27.862567', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.12026812881231308, 'timestamp': '2025-09-30 22:11:27.869518', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:27.925114', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.13630811870098114, 'timestamp': '2025-09-30 22:11:27.929108', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:27.986406', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.26638561487197876, 'timestamp': '2025-09-30 22:11:27.993021', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:28.060615', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.2179136425256729, 'timestamp': '2025-09-30 22:11:28.063590', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:28.119817', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.14066891372203827, 'timestamp': '2025-09-30 22:11:28.125593', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:28.182051', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.2623215913772583, 'timestamp': '2025-09-30 22:11:28.186513', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:28.247782', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.2026829868555069, 'timestamp': '2025-09-30 22:11:28.257946', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:28.338204', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.17577813565731049, 'timestamp': '2025-09-30 22:11:28.350435', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:28.425322', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.14684316515922546, 'timestamp': '2025-09-30 22:11:28.432028', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:28.496682', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.21236492693424225, 'timestamp': '2025-09-30 22:11:28.498813', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:28.557895', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.20859956741333008, 'timestamp': '2025-09-30 22:11:28.560117', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:28.623893', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.15445782244205475, 'timestamp': '2025-09-30 22:11:28.626144', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:28.685063', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.18888531625270844, 'timestamp': '2025-09-30 22:11:28.691573', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:28.754914', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.14307014644145966, 'timestamp': '2025-09-30 22:11:28.757905', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:28.829180', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.242135152220726, 'timestamp': '2025-09-30 22:11:28.831651', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:28.891028', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.24073399603366852, 'timestamp': '2025-09-30 22:11:28.893498', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:28.949419', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.20160427689552307, 'timestamp': '2025-09-30 22:11:28.955095', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:29.012042', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.2022104263305664, 'timestamp': '2025-09-30 22:11:29.017403', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:29.073518', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.22617165744304657, 'timestamp': '2025-09-30 22:11:29.076358', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:29.132551', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.17875288426876068, 'timestamp': '2025-09-30 22:11:29.134970', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:29.193653', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.16964362561702728, 'timestamp': '2025-09-30 22:11:29.198913', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:29.257239', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.12425708770751953, 'timestamp': '2025-09-30 22:11:29.260746', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:29.317176', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.23233745992183685, 'timestamp': '2025-09-30 22:11:29.319620', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:29.376513', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.12206462770700455, 'timestamp': '2025-09-30 22:11:29.378543', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:29.433500', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.16775217652320862, 'timestamp': '2025-09-30 22:11:29.439713', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:29.495053', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.17165979743003845, 'timestamp': '2025-09-30 22:11:29.498153', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:29.555239', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.22416794300079346, 'timestamp': '2025-09-30 22:11:29.557744', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:29.619761', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.26233091950416565, 'timestamp': '2025-09-30 22:11:29.622374', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:29.678979', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.161393404006958, 'timestamp': '2025-09-30 22:11:29.685947', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:29.742124', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.1541907787322998, 'timestamp': '2025-09-30 22:11:29.745127', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:29.800978', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.21274234354496002, 'timestamp': '2025-09-30 22:11:29.806934', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:29.863103', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.32999077439308167, 'timestamp': '2025-09-30 22:11:29.866517', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:29.922591', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.24105632305145264, 'timestamp': '2025-09-30 22:11:29.928889', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:29.987818', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.17914296686649323, 'timestamp': '2025-09-30 22:11:29.991228', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:30.050473', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.13150016963481903, 'timestamp': '2025-09-30 22:11:30.052604', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:30.110108', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.23090338706970215, 'timestamp': '2025-09-30 22:11:30.127080', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:30.182956', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.15982288122177124, 'timestamp': '2025-09-30 22:11:30.197230', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:30.253219', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.1499449908733368, 'timestamp': '2025-09-30 22:11:30.257018', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:30.312526', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.1615850031375885, 'timestamp': '2025-09-30 22:11:30.315581', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:30.372469', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.2992624044418335, 'timestamp': '2025-09-30 22:11:30.374901', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:30.430768', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.11106555908918381, 'timestamp': '2025-09-30 22:11:30.436263', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:30.490954', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.23700077831745148, 'timestamp': '2025-09-30 22:11:30.494391', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:30.550405', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.23316748440265656, 'timestamp': '2025-09-30 22:11:30.554619', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:30.612532', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.1475050002336502, 'timestamp': '2025-09-30 22:11:30.615645', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:30.675233', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.23778966069221497, 'timestamp': '2025-09-30 22:11:30.680986', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:30.743500', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.17088578641414642, 'timestamp': '2025-09-30 22:11:30.746049', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:30.801692', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.16426484286785126, 'timestamp': '2025-09-30 22:11:30.803618', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:30.858749', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.17841152846813202, 'timestamp': '2025-09-30 22:11:30.863937', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:30.919384', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.21598389744758606, 'timestamp': '2025-09-30 22:11:30.925763', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:30.982115', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.26832911372184753, 'timestamp': '2025-09-30 22:11:30.984856', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:31.041855', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.22434476017951965, 'timestamp': '2025-09-30 22:11:31.045276', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:31.102137', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.2514825761318207, 'timestamp': '2025-09-30 22:11:31.116610', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:31.173484', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.18947283923625946, 'timestamp': '2025-09-30 22:11:31.180082', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:31.235541', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.2614597976207733, 'timestamp': '2025-09-30 22:11:31.238592', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:31.293826', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.17410224676132202, 'timestamp': '2025-09-30 22:11:31.298386', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:31.354569', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.17038941383361816, 'timestamp': '2025-09-30 22:11:31.357526', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:31.414358', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.23187260329723358, 'timestamp': '2025-09-30 22:11:31.420275', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:31.482627', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.23237115144729614, 'timestamp': '2025-09-30 22:11:31.485867', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:31.541844', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.19969430565834045, 'timestamp': '2025-09-30 22:11:31.549894', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:31.610795', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.23457977175712585, 'timestamp': '2025-09-30 22:11:31.621352', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:31.679238', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.2462366372346878, 'timestamp': '2025-09-30 22:11:31.689455', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:31.744670', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.24736006557941437, 'timestamp': '2025-09-30 22:11:31.753991', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:31.815657', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.18572001159191132, 'timestamp': '2025-09-30 22:11:31.818251', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:31.876198', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.24549803137779236, 'timestamp': '2025-09-30 22:11:31.879106', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:31.939278', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.24016529321670532, 'timestamp': '2025-09-30 22:11:31.949103', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:32.009360', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.2246263176202774, 'timestamp': '2025-09-30 22:11:32.013837', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:32.087700', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.24933776259422302, 'timestamp': '2025-09-30 22:11:32.091737', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:32.148536', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.14738507568836212, 'timestamp': '2025-09-30 22:11:32.151412', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:32.208256', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.2571263909339905, 'timestamp': '2025-09-30 22:11:32.215058', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:32.271584', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.15746770799160004, 'timestamp': '2025-09-30 22:11:32.274170', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:32.331939', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.19901254773139954, 'timestamp': '2025-09-30 22:11:32.338266', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:11:46.180187', 'step': 466, 'epoch': 1} {'type': 'pplx', 'content': 8008.428901500123, 'timestamp': '2025-09-30 22:11:46.185836', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:46.257169', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.22832578420639038, 'timestamp': '2025-09-30 22:11:46.259123', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:46.336078', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.20699921250343323, 'timestamp': '2025-09-30 22:11:46.342934', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:46.418682', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.21099412441253662, 'timestamp': '2025-09-30 22:11:46.425764', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:46.510401', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.10680516809225082, 'timestamp': '2025-09-30 22:11:46.512915', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:46.609158', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.2314298301935196, 'timestamp': '2025-09-30 22:11:46.611455', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:46.692345', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.2410610467195511, 'timestamp': '2025-09-30 22:11:46.698382', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:46.772598', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.18822167813777924, 'timestamp': '2025-09-30 22:11:46.774501', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:46.849628', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.1078793928027153, 'timestamp': '2025-09-30 22:11:46.851971', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:46.918081', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.29667383432388306, 'timestamp': '2025-09-30 22:11:46.921623', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:47.011035', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.1580858677625656, 'timestamp': '2025-09-30 22:11:47.017417', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:47.073118', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.15708808600902557, 'timestamp': '2025-09-30 22:11:47.075536', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:47.139832', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.13160811364650726, 'timestamp': '2025-09-30 22:11:47.142441', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:47.200374', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.15509988367557526, 'timestamp': '2025-09-30 22:11:47.207569', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:47.263790', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.29463982582092285, 'timestamp': '2025-09-30 22:11:47.270584', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:11:47.326611', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.22011958062648773, 'timestamp': '2025-09-30 22:11:47.330021', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:47.384915', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.19036607444286346, 'timestamp': '2025-09-30 22:11:47.386933', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:47.442583', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.16020134091377258, 'timestamp': '2025-09-30 22:11:47.444959', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:47.500961', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.2716040015220642, 'timestamp': '2025-09-30 22:11:47.507453', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:47.570781', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.18507523834705353, 'timestamp': '2025-09-30 22:11:47.572847', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:47.628519', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.16691705584526062, 'timestamp': '2025-09-30 22:11:47.630494', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:47.686398', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.11469665169715881, 'timestamp': '2025-09-30 22:11:47.688775', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:47.743877', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.26138049364089966, 'timestamp': '2025-09-30 22:11:47.750231', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:47.805607', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.27453169226646423, 'timestamp': '2025-09-30 22:11:47.807575', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:47.863522', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.16601300239562988, 'timestamp': '2025-09-30 22:11:47.867542', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:47.925207', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.19831669330596924, 'timestamp': '2025-09-30 22:11:47.927101', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:47.982248', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.22294898331165314, 'timestamp': '2025-09-30 22:11:47.988088', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:48.042753', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.1284143477678299, 'timestamp': '2025-09-30 22:11:48.045757', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:48.102697', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.22961623966693878, 'timestamp': '2025-09-30 22:11:48.106543', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:48.164058', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.22985157370567322, 'timestamp': '2025-09-30 22:11:48.166148', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:48.222571', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.2386036366224289, 'timestamp': '2025-09-30 22:11:48.228106', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:48.283948', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.2685684561729431, 'timestamp': '2025-09-30 22:11:48.285887', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:48.342243', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.21564850211143494, 'timestamp': '2025-09-30 22:11:48.344159', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:48.399968', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.23572489619255066, 'timestamp': '2025-09-30 22:11:48.402112', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:48.457931', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.16945120692253113, 'timestamp': '2025-09-30 22:11:48.464010', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-30 22:11:48.873124', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:48.933958', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.10732848942279816, 'timestamp': '2025-09-30 22:11:48.936004', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:48.993365', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.22162623703479767, 'timestamp': '2025-09-30 22:11:48.996360', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:49.053254', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.15546900033950806, 'timestamp': '2025-09-30 22:11:49.056057', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:49.114297', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.15397600829601288, 'timestamp': '2025-09-30 22:11:49.121401', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:49.176700', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.15945206582546234, 'timestamp': '2025-09-30 22:11:49.179066', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:49.234664', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.21176856756210327, 'timestamp': '2025-09-30 22:11:49.236757', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:49.293968', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.27609795331954956, 'timestamp': '2025-09-30 22:11:49.295897', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:49.351550', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.13827118277549744, 'timestamp': '2025-09-30 22:11:49.358229', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:49.425578', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.1858249008655548, 'timestamp': '2025-09-30 22:11:49.427451', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:49.482751', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.14755333960056305, 'timestamp': '2025-09-30 22:11:49.484737', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:49.544852', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.12934467196464539, 'timestamp': '2025-09-30 22:11:49.546869', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:49.603508', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.15094828605651855, 'timestamp': '2025-09-30 22:11:49.609377', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:49.665290', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.27920183539390564, 'timestamp': '2025-09-30 22:11:49.667793', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:49.723994', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.1923038214445114, 'timestamp': '2025-09-30 22:11:49.729247', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:49.786687', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.21444940567016602, 'timestamp': '2025-09-30 22:11:49.788776', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:49.845267', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.21689292788505554, 'timestamp': '2025-09-30 22:11:49.850737', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:49.905751', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.24174869060516357, 'timestamp': '2025-09-30 22:11:49.907869', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:49.970105', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.12643255293369293, 'timestamp': '2025-09-30 22:11:49.972504', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:50.028716', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.21592941880226135, 'timestamp': '2025-09-30 22:11:50.030863', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:50.086613', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.1684340536594391, 'timestamp': '2025-09-30 22:11:50.092451', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:50.148071', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.15657339990139008, 'timestamp': '2025-09-30 22:11:50.150130', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:50.206545', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.2989821434020996, 'timestamp': '2025-09-30 22:11:50.208975', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:50.264541', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.20836928486824036, 'timestamp': '2025-09-30 22:11:50.266812', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:50.322965', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.19506777822971344, 'timestamp': '2025-09-30 22:11:50.328374', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:50.384352', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.11602387577295303, 'timestamp': '2025-09-30 22:11:50.386375', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:50.442765', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.24869710206985474, 'timestamp': '2025-09-30 22:11:50.445054', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:50.502216', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.19025100767612457, 'timestamp': '2025-09-30 22:11:50.505212', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:11:50.561343', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.17381742596626282, 'timestamp': '2025-09-30 22:11:50.567139', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:50.623665', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.15637697279453278, 'timestamp': '2025-09-30 22:11:50.627806', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:50.683567', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.203086256980896, 'timestamp': '2025-09-30 22:11:50.685657', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:50.741251', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.15407977998256683, 'timestamp': '2025-09-30 22:11:50.743564', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:50.799553', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.1674392968416214, 'timestamp': '2025-09-30 22:11:50.805456', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:11:50.863463', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.1506372094154358, 'timestamp': '2025-09-30 22:11:50.865904', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:11:50.921887', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.20544810593128204, 'timestamp': '2025-09-30 22:11:50.924228', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:50.980571', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.3901692032814026, 'timestamp': '2025-09-30 22:11:50.982512', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:51.043379', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.18303437530994415, 'timestamp': '2025-09-30 22:11:51.049198', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.104737', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.13014936447143555, 'timestamp': '2025-09-30 22:11:51.106824', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.162855', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.09894213825464249, 'timestamp': '2025-09-30 22:11:51.165024', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.221438', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.2456396520137787, 'timestamp': '2025-09-30 22:11:51.223635', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:51.280335', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.19044259190559387, 'timestamp': '2025-09-30 22:11:51.286043', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.342574', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.20128829777240753, 'timestamp': '2025-09-30 22:11:51.344536', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.400894', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.1914825290441513, 'timestamp': '2025-09-30 22:11:51.403180', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.459725', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.13289803266525269, 'timestamp': '2025-09-30 22:11:51.462393', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:51.517903', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.21776214241981506, 'timestamp': '2025-09-30 22:11:51.523542', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:51.579130', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.18412846326828003, 'timestamp': '2025-09-30 22:11:51.581582', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.637156', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.21724660694599152, 'timestamp': '2025-09-30 22:11:51.639399', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:51.698320', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.1928125023841858, 'timestamp': '2025-09-30 22:11:51.700527', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:51.757133', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.27442798018455505, 'timestamp': '2025-09-30 22:11:51.762513', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:51.817878', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.2910762131214142, 'timestamp': '2025-09-30 22:11:51.823099', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.885679', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.10550037771463394, 'timestamp': '2025-09-30 22:11:51.888017', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:51.943809', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.19241410493850708, 'timestamp': '2025-09-30 22:11:51.946484', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:52.002011', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.2311503291130066, 'timestamp': '2025-09-30 22:11:52.007695', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:52.062886', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.18203839659690857, 'timestamp': '2025-09-30 22:11:52.064897', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:52.120990', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.22657059133052826, 'timestamp': '2025-09-30 22:11:52.123760', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:52.180951', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.17828227579593658, 'timestamp': '2025-09-30 22:11:52.192268', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:52.254071', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.14461784064769745, 'timestamp': '2025-09-30 22:11:52.259849', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:52.316660', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.133847177028656, 'timestamp': '2025-09-30 22:11:52.320331', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:52.376439', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.10876987129449844, 'timestamp': '2025-09-30 22:11:52.378727', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:52.434772', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.15830232203006744, 'timestamp': '2025-09-30 22:11:52.436998', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:52.495605', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.2672060430049896, 'timestamp': '2025-09-30 22:11:52.500989', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:52.557049', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.17039863765239716, 'timestamp': '2025-09-30 22:11:52.564188', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:52.620187', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.12232748419046402, 'timestamp': '2025-09-30 22:11:52.627521', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:52.689694', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.18838587403297424, 'timestamp': '2025-09-30 22:11:52.699336', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:52.759510', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.22326965630054474, 'timestamp': '2025-09-30 22:11:52.764989', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:11:52.820349', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.2903003394603729, 'timestamp': '2025-09-30 22:11:52.822390', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:52.883595', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.23259663581848145, 'timestamp': '2025-09-30 22:11:52.888570', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:52.946433', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.31461846828460693, 'timestamp': '2025-09-30 22:11:52.951214', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:53.008884', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.17392264306545258, 'timestamp': '2025-09-30 22:11:53.014706', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.077574', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.24572737514972687, 'timestamp': '2025-09-30 22:11:53.080272', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.144115', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.14351147413253784, 'timestamp': '2025-09-30 22:11:53.146625', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:53.203835', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.19056232273578644, 'timestamp': '2025-09-30 22:11:53.206400', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.262539', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.18534737825393677, 'timestamp': '2025-09-30 22:11:53.268600', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.323701', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.13859154284000397, 'timestamp': '2025-09-30 22:11:53.329363', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:53.385814', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.31004035472869873, 'timestamp': '2025-09-30 22:11:53.388248', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.444128', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.1356133222579956, 'timestamp': '2025-09-30 22:11:53.452535', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:53.508247', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.1962750107049942, 'timestamp': '2025-09-30 22:11:53.514031', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:53.570050', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.10071783512830734, 'timestamp': '2025-09-30 22:11:53.572230', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:53.630062', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.24459101259708405, 'timestamp': '2025-09-30 22:11:53.632522', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.688717', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.2543933689594269, 'timestamp': '2025-09-30 22:11:53.690910', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:53.755689', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.17518532276153564, 'timestamp': '2025-09-30 22:11:53.761581', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:53.816437', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.15114398300647736, 'timestamp': '2025-09-30 22:11:53.818899', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:53.874918', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.12649154663085938, 'timestamp': '2025-09-30 22:11:53.876973', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.933316', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.20019802451133728, 'timestamp': '2025-09-30 22:11:53.935471', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:53.991059', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.20062975585460663, 'timestamp': '2025-09-30 22:11:53.996890', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:54.055743', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.1434326171875, 'timestamp': '2025-09-30 22:11:54.057895', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:11:54.118184', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.12263672798871994, 'timestamp': '2025-09-30 22:11:54.120643', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:54.176202', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.15446330606937408, 'timestamp': '2025-09-30 22:11:54.178340', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:54.234637', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.18004484474658966, 'timestamp': '2025-09-30 22:11:54.240333', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:54.301681', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.2449890822172165, 'timestamp': '2025-09-30 22:11:54.305235', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:54.361726', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.1838647425174713, 'timestamp': '2025-09-30 22:11:54.364281', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:54.425836', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.209991455078125, 'timestamp': '2025-09-30 22:11:54.429222', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:11:54.485516', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.1810920238494873, 'timestamp': '2025-09-30 22:11:54.491495', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:54.547358', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.23496204614639282, 'timestamp': '2025-09-30 22:11:54.549967', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:54.607114', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.1478070765733719, 'timestamp': '2025-09-30 22:11:54.609596', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:54.676327', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.15676279366016388, 'timestamp': '2025-09-30 22:11:54.679532', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:54.735672', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.36324793100357056, 'timestamp': '2025-09-30 22:11:54.741457', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:54.796864', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.18965314328670502, 'timestamp': '2025-09-30 22:11:54.799475', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:54.857793', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.22290930151939392, 'timestamp': '2025-09-30 22:11:54.859421', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:54.915276', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.2458212673664093, 'timestamp': '2025-09-30 22:11:54.917077', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:54.978344', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.2314823418855667, 'timestamp': '2025-09-30 22:11:54.984871', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:55.044876', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.16566340625286102, 'timestamp': '2025-09-30 22:11:55.047347', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:55.105737', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.29851388931274414, 'timestamp': '2025-09-30 22:11:55.109558', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:55.166348', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.2540399730205536, 'timestamp': '2025-09-30 22:11:55.168974', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:55.227392', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.16770128905773163, 'timestamp': '2025-09-30 22:11:55.232910', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:55.288308', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.1474665403366089, 'timestamp': '2025-09-30 22:11:55.290606', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:55.347900', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.15128646790981293, 'timestamp': '2025-09-30 22:11:55.350641', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:55.406820', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.08400417864322662, 'timestamp': '2025-09-30 22:11:55.410355', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:55.467933', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.16043642163276672, 'timestamp': '2025-09-30 22:11:55.474240', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:55.530220', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.11801206320524216, 'timestamp': '2025-09-30 22:11:55.532197', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:55.588287', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.22792378067970276, 'timestamp': '2025-09-30 22:11:55.591035', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:55.647105', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.2500491738319397, 'timestamp': '2025-09-30 22:11:55.648926', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:55.709346', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.22615186870098114, 'timestamp': '2025-09-30 22:11:55.715337', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:55.771297', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.15888667106628418, 'timestamp': '2025-09-30 22:11:55.775810', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:55.832605', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.23342831432819366, 'timestamp': '2025-09-30 22:11:55.834438', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:55.890947', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.23033948242664337, 'timestamp': '2025-09-30 22:11:55.893237', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:55.949201', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.1731342226266861, 'timestamp': '2025-09-30 22:11:55.955165', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:56.014082', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.22730785608291626, 'timestamp': '2025-09-30 22:11:56.016088', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:56.071557', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.2651865780353546, 'timestamp': '2025-09-30 22:11:56.073560', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:56.140911', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.15435177087783813, 'timestamp': '2025-09-30 22:11:56.142963', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:56.198955', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.24934111535549164, 'timestamp': '2025-09-30 22:11:56.204364', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:56.259379', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.2524150013923645, 'timestamp': '2025-09-30 22:11:56.261957', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:56.317550', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.24701440334320068, 'timestamp': '2025-09-30 22:11:56.319506', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:56.377507', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.2537510097026825, 'timestamp': '2025-09-30 22:11:56.379541', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:56.435254', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.20716746151447296, 'timestamp': '2025-09-30 22:11:56.440974', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:56.503828', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.13916124403476715, 'timestamp': '2025-09-30 22:11:56.505988', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:56.561963', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.21353694796562195, 'timestamp': '2025-09-30 22:11:56.563936', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:56.623890', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.2615525424480438, 'timestamp': '2025-09-30 22:11:56.626187', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:56.682090', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.16672831773757935, 'timestamp': '2025-09-30 22:11:56.687704', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:56.743206', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.2517176568508148, 'timestamp': '2025-09-30 22:11:56.745153', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:56.800897', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.15382523834705353, 'timestamp': '2025-09-30 22:11:56.802873', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:56.859640', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.1512170433998108, 'timestamp': '2025-09-30 22:11:56.864765', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:56.921212', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.20054449141025543, 'timestamp': '2025-09-30 22:11:56.929450', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:56.985385', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.14014515280723572, 'timestamp': '2025-09-30 22:11:56.987579', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:57.043429', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.2400130331516266, 'timestamp': '2025-09-30 22:11:57.045525', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:57.101092', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.14418958127498627, 'timestamp': '2025-09-30 22:11:57.103079', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:57.159132', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.17654521763324738, 'timestamp': '2025-09-30 22:11:57.164763', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:57.219418', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.18362335860729218, 'timestamp': '2025-09-30 22:11:57.223810', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:57.280578', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.21332359313964844, 'timestamp': '2025-09-30 22:11:57.282437', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:57.338237', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.19924978911876678, 'timestamp': '2025-09-30 22:11:57.340840', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:57.396964', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.1538708508014679, 'timestamp': '2025-09-30 22:11:57.406106', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:57.471463', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.21143244206905365, 'timestamp': '2025-09-30 22:11:57.474483', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:57.530335', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.18663176894187927, 'timestamp': '2025-09-30 22:11:57.536546', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:57.593336', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.21314330399036407, 'timestamp': '2025-09-30 22:11:57.598144', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:57.658775', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.19895565509796143, 'timestamp': '2025-09-30 22:11:57.667841', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:57.727682', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.20828227698802948, 'timestamp': '2025-09-30 22:11:57.729734', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:57.785690', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.16906701028347015, 'timestamp': '2025-09-30 22:11:57.788000', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:57.844887', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.214724600315094, 'timestamp': '2025-09-30 22:11:57.847304', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:57.902364', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.24892598390579224, 'timestamp': '2025-09-30 22:11:57.910814', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:57.966256', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.18823392689228058, 'timestamp': '2025-09-30 22:11:57.969423', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:58.028023', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.17969410121440887, 'timestamp': '2025-09-30 22:11:58.030991', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:11:58.088441', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.36191320419311523, 'timestamp': '2025-09-30 22:11:58.090879', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:58.152137', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.08993330597877502, 'timestamp': '2025-09-30 22:11:58.157424', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:58.216551', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.19067968428134918, 'timestamp': '2025-09-30 22:11:58.219124', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.275853', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.22519928216934204, 'timestamp': '2025-09-30 22:11:58.280306', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:58.340384', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.2141573578119278, 'timestamp': '2025-09-30 22:11:58.345822', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.407381', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.09782801568508148, 'timestamp': '2025-09-30 22:11:58.413038', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:58.472445', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.15874141454696655, 'timestamp': '2025-09-30 22:11:58.475211', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.532241', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.13980266451835632, 'timestamp': '2025-09-30 22:11:58.535246', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:58.591271', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.20028004050254822, 'timestamp': '2025-09-30 22:11:58.594350', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.651902', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.15554752945899963, 'timestamp': '2025-09-30 22:11:58.662874', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.718984', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.19115012884140015, 'timestamp': '2025-09-30 22:11:58.726262', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.784250', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.2429288774728775, 'timestamp': '2025-09-30 22:11:58.789456', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.845366', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.27936631441116333, 'timestamp': '2025-09-30 22:11:58.850270', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.906959', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.2241462916135788, 'timestamp': '2025-09-30 22:11:58.912882', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:58.967835', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.23516367375850677, 'timestamp': '2025-09-30 22:11:58.970346', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:59.028387', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.13646866381168365, 'timestamp': '2025-09-30 22:11:59.033253', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:59.094032', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.2098173201084137, 'timestamp': '2025-09-30 22:11:59.097445', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:59.154909', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.20390798151493073, 'timestamp': '2025-09-30 22:11:59.161368', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:59.219249', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.362056165933609, 'timestamp': '2025-09-30 22:11:59.222220', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:11:59.278760', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.26044219732284546, 'timestamp': '2025-09-30 22:11:59.282267', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:59.339128', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.18253199756145477, 'timestamp': '2025-09-30 22:11:59.342052', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:59.402519', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.19012662768363953, 'timestamp': '2025-09-30 22:11:59.409833', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:59.474651', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.18107323348522186, 'timestamp': '2025-09-30 22:11:59.478626', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:59.535956', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.18533870577812195, 'timestamp': '2025-09-30 22:11:59.539402', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:59.596120', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.18079029023647308, 'timestamp': '2025-09-30 22:11:59.603260', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:11:59.660712', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.16578859090805054, 'timestamp': '2025-09-30 22:11:59.671172', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:59.727146', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.16148200631141663, 'timestamp': '2025-09-30 22:11:59.729764', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:11:59.788701', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.3158484399318695, 'timestamp': '2025-09-30 22:11:59.791557', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:11:59.848116', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.19927377998828888, 'timestamp': '2025-09-30 22:11:59.852198', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:11:59.908573', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.21230587363243103, 'timestamp': '2025-09-30 22:11:59.915836', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:11:59.972540', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.18517597019672394, 'timestamp': '2025-09-30 22:11:59.975035', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:00.033220', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.19802626967430115, 'timestamp': '2025-09-30 22:12:00.036635', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:00.093961', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.18750356137752533, 'timestamp': '2025-09-30 22:12:00.096546', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:00.153298', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.15106911957263947, 'timestamp': '2025-09-30 22:12:00.159532', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:00.215305', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.2104482501745224, 'timestamp': '2025-09-30 22:12:00.218061', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:00.276557', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.21918952465057373, 'timestamp': '2025-09-30 22:12:00.279505', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:00.336436', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.1908463090658188, 'timestamp': '2025-09-30 22:12:00.340843', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:00.398451', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.13086000084877014, 'timestamp': '2025-09-30 22:12:00.405941', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:00.464891', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.20047049224376678, 'timestamp': '2025-09-30 22:12:00.467628', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:00.524969', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.25967246294021606, 'timestamp': '2025-09-30 22:12:00.527567', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:00.584966', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.18779341876506805, 'timestamp': '2025-09-30 22:12:00.589372', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:00.646681', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.1962081789970398, 'timestamp': '2025-09-30 22:12:00.655513', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:00.712204', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.19732949137687683, 'timestamp': '2025-09-30 22:12:00.717124', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:00.773456', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.2707637846469879, 'timestamp': '2025-09-30 22:12:00.776387', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:00.839056', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.26742541790008545, 'timestamp': '2025-09-30 22:12:00.841534', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:00.898473', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.1501181572675705, 'timestamp': '2025-09-30 22:12:00.904862', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:00.966843', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.2187950313091278, 'timestamp': '2025-09-30 22:12:00.969444', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:01.038215', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.1921483725309372, 'timestamp': '2025-09-30 22:12:01.045531', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:01.106830', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.24725928902626038, 'timestamp': '2025-09-30 22:12:01.110953', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:01.168539', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.10458771884441376, 'timestamp': '2025-09-30 22:12:01.178806', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:01.235477', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.2370263785123825, 'timestamp': '2025-09-30 22:12:01.238381', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:01.295573', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.15662895143032074, 'timestamp': '2025-09-30 22:12:01.300049', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:01.360851', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.2148972749710083, 'timestamp': '2025-09-30 22:12:01.372960', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:01.429313', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.2022460252046585, 'timestamp': '2025-09-30 22:12:01.437689', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:01.502571', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.22188451886177063, 'timestamp': '2025-09-30 22:12:01.510078', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:01.583291', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.21942368149757385, 'timestamp': '2025-09-30 22:12:01.595529', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:01.653455', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.2748608887195587, 'timestamp': '2025-09-30 22:12:01.655820', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:01.715349', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.19345656037330627, 'timestamp': '2025-09-30 22:12:01.721575', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:01.776925', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.15031586587429047, 'timestamp': '2025-09-30 22:12:01.779458', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:01.839447', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.1954568773508072, 'timestamp': '2025-09-30 22:12:01.841989', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:01.900090', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.19696274399757385, 'timestamp': '2025-09-30 22:12:01.902665', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:01.962630', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.14532142877578735, 'timestamp': '2025-09-30 22:12:01.969100', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:02.026400', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.23082798719406128, 'timestamp': '2025-09-30 22:12:02.029119', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.088129', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.17687079310417175, 'timestamp': '2025-09-30 22:12:02.091394', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.149471', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.16662949323654175, 'timestamp': '2025-09-30 22:12:02.152510', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:02.209462', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.1856682151556015, 'timestamp': '2025-09-30 22:12:02.216515', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:02.273749', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.1403038650751114, 'timestamp': '2025-09-30 22:12:02.277203', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.339727', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.23096083104610443, 'timestamp': '2025-09-30 22:12:02.349377', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:02.411087', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.2159464955329895, 'timestamp': '2025-09-30 22:12:02.414112', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:02.471538', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.148705393075943, 'timestamp': '2025-09-30 22:12:02.481952', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.539159', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.12124668061733246, 'timestamp': '2025-09-30 22:12:02.542928', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:02.599654', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.20870791375637054, 'timestamp': '2025-09-30 22:12:02.606232', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:02.663529', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.21856123208999634, 'timestamp': '2025-09-30 22:12:02.669207', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:02.728393', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.09723266959190369, 'timestamp': '2025-09-30 22:12:02.735220', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.792570', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.2303348034620285, 'timestamp': '2025-09-30 22:12:02.795017', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.851998', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.24226313829421997, 'timestamp': '2025-09-30 22:12:02.855379', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.913394', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.14910516142845154, 'timestamp': '2025-09-30 22:12:02.916128', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:02.972863', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.16545231640338898, 'timestamp': '2025-09-30 22:12:02.978859', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:03.034551', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.17956823110580444, 'timestamp': '2025-09-30 22:12:03.037724', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:03.097766', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.1760360598564148, 'timestamp': '2025-09-30 22:12:03.100175', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:03.156871', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.22687560319900513, 'timestamp': '2025-09-30 22:12:03.160929', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:03.219422', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.15758097171783447, 'timestamp': '2025-09-30 22:12:03.225679', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:03.292942', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.19289617240428925, 'timestamp': '2025-09-30 22:12:03.295227', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:03.352127', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.24194438755512238, 'timestamp': '2025-09-30 22:12:03.360005', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:03.417774', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.29431501030921936, 'timestamp': '2025-09-30 22:12:03.420354', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:03.481674', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.19580687582492828, 'timestamp': '2025-09-30 22:12:03.488510', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:03.544769', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.17215746641159058, 'timestamp': '2025-09-30 22:12:03.553380', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:03.612057', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.14011698961257935, 'timestamp': '2025-09-30 22:12:03.615569', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:03.672295', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.15043015778064728, 'timestamp': '2025-09-30 22:12:03.679137', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:03.736219', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.11237704008817673, 'timestamp': '2025-09-30 22:12:03.743855', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:03.813563', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.16790269315242767, 'timestamp': '2025-09-30 22:12:03.816859', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:03.878210', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.16490621864795685, 'timestamp': '2025-09-30 22:12:03.881260', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:03.938329', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.21719390153884888, 'timestamp': '2025-09-30 22:12:03.941002', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:03.999362', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.18799471855163574, 'timestamp': '2025-09-30 22:12:04.007429', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:04.064406', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.16177502274513245, 'timestamp': '2025-09-30 22:12:04.066915', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:04.129263', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.21438507735729218, 'timestamp': '2025-09-30 22:12:04.133377', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:04.195189', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.19695338606834412, 'timestamp': '2025-09-30 22:12:04.202388', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:04.263196', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.219474196434021, 'timestamp': '2025-09-30 22:12:04.268828', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:04.326231', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.2121901959180832, 'timestamp': '2025-09-30 22:12:04.329194', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:04.388683', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.2705829441547394, 'timestamp': '2025-09-30 22:12:04.395039', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:04.452359', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.24034357070922852, 'timestamp': '2025-09-30 22:12:04.455111', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:04.514701', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.2636682093143463, 'timestamp': '2025-09-30 22:12:04.521056', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:04.583814', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.19239258766174316, 'timestamp': '2025-09-30 22:12:04.587548', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:04.644710', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.24020835757255554, 'timestamp': '2025-09-30 22:12:04.650634', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:04.710695', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.17362891137599945, 'timestamp': '2025-09-30 22:12:04.713588', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:04.770552', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.16982819139957428, 'timestamp': '2025-09-30 22:12:04.777925', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:04.838546', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.17167127132415771, 'timestamp': '2025-09-30 22:12:04.847862', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:04.913276', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.3086065649986267, 'timestamp': '2025-09-30 22:12:04.916570', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:04.973860', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.14641433954238892, 'timestamp': '2025-09-30 22:12:04.977901', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.035295', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.13150186836719513, 'timestamp': '2025-09-30 22:12:05.042714', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.103867', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.1373935043811798, 'timestamp': '2025-09-30 22:12:05.107144', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.166193', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.15361443161964417, 'timestamp': '2025-09-30 22:12:05.172270', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:05.235649', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.29508858919143677, 'timestamp': '2025-09-30 22:12:05.239297', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.296632', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.16856037080287933, 'timestamp': '2025-09-30 22:12:05.303862', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:05.359650', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.3016975522041321, 'timestamp': '2025-09-30 22:12:05.362769', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.418964', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.25484389066696167, 'timestamp': '2025-09-30 22:12:05.428645', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:05.492767', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.12782196700572968, 'timestamp': '2025-09-30 22:12:05.495866', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.558744', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.1921669840812683, 'timestamp': '2025-09-30 22:12:05.574335', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:05.630368', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.23118874430656433, 'timestamp': '2025-09-30 22:12:05.635049', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.692906', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.1899738758802414, 'timestamp': '2025-09-30 22:12:05.696266', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:05.754046', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.15608051419258118, 'timestamp': '2025-09-30 22:12:05.760538', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:05.817101', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.08864343911409378, 'timestamp': '2025-09-30 22:12:05.827882', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:05.886944', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.19454100728034973, 'timestamp': '2025-09-30 22:12:05.894140', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:05.955845', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.23882727324962616, 'timestamp': '2025-09-30 22:12:05.958845', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:06.016706', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.11389666795730591, 'timestamp': '2025-09-30 22:12:06.019655', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:06.081575', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.24640117585659027, 'timestamp': '2025-09-30 22:12:06.091340', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:06.148547', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.22861222922801971, 'timestamp': '2025-09-30 22:12:06.151191', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:06.206344', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.08908125013113022, 'timestamp': '2025-09-30 22:12:06.217972', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:06.274724', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.267127126455307, 'timestamp': '2025-09-30 22:12:06.278651', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:06.339927', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.26272013783454895, 'timestamp': '2025-09-30 22:12:06.347466', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:06.416348', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.138486847281456, 'timestamp': '2025-09-30 22:12:06.424372', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:06.481311', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.16281068325042725, 'timestamp': '2025-09-30 22:12:06.484294', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:06.542824', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.21891149878501892, 'timestamp': '2025-09-30 22:12:06.545916', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:06.603611', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.20321416854858398, 'timestamp': '2025-09-30 22:12:06.610074', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:06.673542', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.21077358722686768, 'timestamp': '2025-09-30 22:12:06.676646', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:06.734033', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.3105182945728302, 'timestamp': '2025-09-30 22:12:06.736482', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:06.798175', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.22638200223445892, 'timestamp': '2025-09-30 22:12:06.804349', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:06.862319', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.14126056432724, 'timestamp': '2025-09-30 22:12:06.869057', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:06.927367', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.1733216494321823, 'timestamp': '2025-09-30 22:12:06.932912', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:06.995444', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.3144453465938568, 'timestamp': '2025-09-30 22:12:06.998785', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:07.058888', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.21718546748161316, 'timestamp': '2025-09-30 22:12:07.062034', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:07.124377', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.17720946669578552, 'timestamp': '2025-09-30 22:12:07.137048', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:07.196793', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.16042743623256683, 'timestamp': '2025-09-30 22:12:07.199384', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:07.263089', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.28166723251342773, 'timestamp': '2025-09-30 22:12:07.270875', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:07.334479', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.1601470410823822, 'timestamp': '2025-09-30 22:12:07.337805', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:07.402803', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.15244218707084656, 'timestamp': '2025-09-30 22:12:07.409463', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:07.469204', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.13020753860473633, 'timestamp': '2025-09-30 22:12:07.476260', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:07.532659', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.13213253021240234, 'timestamp': '2025-09-30 22:12:07.536176', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:07.593814', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.24616125226020813, 'timestamp': '2025-09-30 22:12:07.596835', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:07.658556', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.2857804000377655, 'timestamp': '2025-09-30 22:12:07.665058', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:07.720308', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.19320528209209442, 'timestamp': '2025-09-30 22:12:07.726463', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:07.782957', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.15031787753105164, 'timestamp': '2025-09-30 22:12:07.786300', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:07.843831', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.23504918813705444, 'timestamp': '2025-09-30 22:12:07.848266', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:07.908204', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.17341046035289764, 'timestamp': '2025-09-30 22:12:07.914508', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:07.970676', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.12179598957300186, 'timestamp': '2025-09-30 22:12:07.973840', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:08.030858', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.21134288609027863, 'timestamp': '2025-09-30 22:12:08.034187', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:08.104037', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.2825859487056732, 'timestamp': '2025-09-30 22:12:08.107234', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:08.165998', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.18251150846481323, 'timestamp': '2025-09-30 22:12:08.172255', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:08.227853', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.21388526260852814, 'timestamp': '2025-09-30 22:12:08.237539', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:08.302423', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.19133110344409943, 'timestamp': '2025-09-30 22:12:08.306753', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:08.363117', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.24365024268627167, 'timestamp': '2025-09-30 22:12:08.367536', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:08.427306', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.19058401882648468, 'timestamp': '2025-09-30 22:12:08.434602', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:08.491515', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.1606425940990448, 'timestamp': '2025-09-30 22:12:08.494877', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:08.554191', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.1988113522529602, 'timestamp': '2025-09-30 22:12:08.558289', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:12:08.618822', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.20374299585819244, 'timestamp': '2025-09-30 22:12:08.622210', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:08.680244', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.19176843762397766, 'timestamp': '2025-09-30 22:12:08.687148', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:08.751048', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.2073064148426056, 'timestamp': '2025-09-30 22:12:08.754215', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:08.813405', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.17931288480758667, 'timestamp': '2025-09-30 22:12:08.817462', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:08.875412', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.18207938969135284, 'timestamp': '2025-09-30 22:12:08.879870', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:08.937600', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.17140445113182068, 'timestamp': '2025-09-30 22:12:08.944464', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:09.000221', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.2680283486843109, 'timestamp': '2025-09-30 22:12:09.003917', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:09.061370', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.18639126420021057, 'timestamp': '2025-09-30 22:12:09.065322', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:09.123693', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.15952971577644348, 'timestamp': '2025-09-30 22:12:09.127995', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:09.193055', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.14535531401634216, 'timestamp': '2025-09-30 22:12:09.200313', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:09.257333', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.2548336088657379, 'timestamp': '2025-09-30 22:12:09.260477', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:09.318936', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.12835317850112915, 'timestamp': '2025-09-30 22:12:09.324361', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:09.381771', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.3145178556442261, 'timestamp': '2025-09-30 22:12:09.385520', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:09.444075', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.2335314154624939, 'timestamp': '2025-09-30 22:12:09.451411', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:09.507931', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.12509585916996002, 'timestamp': '2025-09-30 22:12:09.511293', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:09.568836', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.14955422282218933, 'timestamp': '2025-09-30 22:12:09.573817', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:09.631350', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.3453223407268524, 'timestamp': '2025-09-30 22:12:09.634551', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:09.691748', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.17937330901622772, 'timestamp': '2025-09-30 22:12:09.698330', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:09.757766', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.14108096063137054, 'timestamp': '2025-09-30 22:12:09.771092', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:09.831816', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.14978133141994476, 'timestamp': '2025-09-30 22:12:09.836474', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:09.894526', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.1434394121170044, 'timestamp': '2025-09-30 22:12:09.899022', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:09.958614', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.1678590625524521, 'timestamp': '2025-09-30 22:12:09.965685', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.023786', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.1882156878709793, 'timestamp': '2025-09-30 22:12:10.027898', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:10.086130', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.21010273694992065, 'timestamp': '2025-09-30 22:12:10.089583', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:10.146576', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.31538674235343933, 'timestamp': '2025-09-30 22:12:10.150219', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.207646', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.20333518087863922, 'timestamp': '2025-09-30 22:12:10.215646', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:10.276961', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.1774730682373047, 'timestamp': '2025-09-30 22:12:10.288190', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.346066', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.13044632971286774, 'timestamp': '2025-09-30 22:12:10.348785', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.405940', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.22918885946273804, 'timestamp': '2025-09-30 22:12:10.409803', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.466770', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.2572970688343048, 'timestamp': '2025-09-30 22:12:10.475157', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:10.531048', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.1945604383945465, 'timestamp': '2025-09-30 22:12:10.541854', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:10.599498', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.2235056757926941, 'timestamp': '2025-09-30 22:12:10.602061', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.659553', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.18014207482337952, 'timestamp': '2025-09-30 22:12:10.666423', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:10.723981', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.2447717934846878, 'timestamp': '2025-09-30 22:12:10.732503', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.796193', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.15133507549762726, 'timestamp': '2025-09-30 22:12:10.800499', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:10.857787', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.16784603893756866, 'timestamp': '2025-09-30 22:12:10.860631', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:10.919279', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.24281814694404602, 'timestamp': '2025-09-30 22:12:10.924594', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:10.981895', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.15944667160511017, 'timestamp': '2025-09-30 22:12:10.996000', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:11.053945', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.18552806973457336, 'timestamp': '2025-09-30 22:12:11.057230', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:11.117892', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.20929870009422302, 'timestamp': '2025-09-30 22:12:11.121461', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:11.180275', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.16960929334163666, 'timestamp': '2025-09-30 22:12:11.183221', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:11.240985', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.1665547788143158, 'timestamp': '2025-09-30 22:12:11.247953', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:11.304762', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.3055075407028198, 'timestamp': '2025-09-30 22:12:11.309137', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:11.370067', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.21146436035633087, 'timestamp': '2025-09-30 22:12:11.378639', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:11.435313', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.13466477394104004, 'timestamp': '2025-09-30 22:12:11.437872', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:11.502829', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.18414711952209473, 'timestamp': '2025-09-30 22:12:11.517185', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:11.574917', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.17859964072704315, 'timestamp': '2025-09-30 22:12:11.579377', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:11.640436', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.08083513379096985, 'timestamp': '2025-09-30 22:12:11.647969', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:11.706814', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.2790297269821167, 'timestamp': '2025-09-30 22:12:11.709670', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:11.767556', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.1594020575284958, 'timestamp': '2025-09-30 22:12:11.777502', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:11.834958', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.19364415109157562, 'timestamp': '2025-09-30 22:12:11.839095', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:11.901963', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.2088746428489685, 'timestamp': '2025-09-30 22:12:11.910639', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:11.971751', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.19542336463928223, 'timestamp': '2025-09-30 22:12:11.974646', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:12.030402', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.24358536303043365, 'timestamp': '2025-09-30 22:12:12.036590', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:12.091764', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.14595745503902435, 'timestamp': '2025-09-30 22:12:12.094073', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:12.149925', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.2154374122619629, 'timestamp': '2025-09-30 22:12:12.156752', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:12.214883', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.10929782688617706, 'timestamp': '2025-09-30 22:12:12.217925', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:12.277655', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.16833485662937164, 'timestamp': '2025-09-30 22:12:12.292221', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:12.362461', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.21464209258556366, 'timestamp': '2025-09-30 22:12:12.365462', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:12.427040', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.283296674489975, 'timestamp': '2025-09-30 22:12:12.433662', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:12.494228', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.2113868147134781, 'timestamp': '2025-09-30 22:12:12.506895', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:12.566445', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.16828007996082306, 'timestamp': '2025-09-30 22:12:12.573592', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:12.637158', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.17851999402046204, 'timestamp': '2025-09-30 22:12:12.639648', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:12.695966', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.1546407788991928, 'timestamp': '2025-09-30 22:12:12.698802', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:12.756362', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.18888075649738312, 'timestamp': '2025-09-30 22:12:12.763064', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:12.821511', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.15767960250377655, 'timestamp': '2025-09-30 22:12:12.827722', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:12.883240', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.17233683168888092, 'timestamp': '2025-09-30 22:12:12.885570', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:12.943751', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.2565377354621887, 'timestamp': '2025-09-30 22:12:12.947051', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:13.003499', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.17855773866176605, 'timestamp': '2025-09-30 22:12:13.007186', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:13.063912', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.21859653294086456, 'timestamp': '2025-09-30 22:12:13.072482', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:13.130224', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.13855092227458954, 'timestamp': '2025-09-30 22:12:13.135357', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:13.194613', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.34564465284347534, 'timestamp': '2025-09-30 22:12:13.199405', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:13.256888', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.23947644233703613, 'timestamp': '2025-09-30 22:12:13.261278', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:13.323171', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.2786639928817749, 'timestamp': '2025-09-30 22:12:13.332559', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:13.389422', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.2451365739107132, 'timestamp': '2025-09-30 22:12:13.391879', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:13.448549', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.1137644425034523, 'timestamp': '2025-09-30 22:12:13.451142', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:13.507824', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.22169752418994904, 'timestamp': '2025-09-30 22:12:13.511497', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:13.573185', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.15987257659435272, 'timestamp': '2025-09-30 22:12:13.583408', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:13.639664', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.1310587376356125, 'timestamp': '2025-09-30 22:12:13.643189', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:13.700102', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.2008838802576065, 'timestamp': '2025-09-30 22:12:13.702459', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:13.761204', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.1205814927816391, 'timestamp': '2025-09-30 22:12:13.763557', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:13.825003', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.21685290336608887, 'timestamp': '2025-09-30 22:12:13.837548', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:13.893547', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.16454990208148956, 'timestamp': '2025-09-30 22:12:13.904311', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:13.963607', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.1714007705450058, 'timestamp': '2025-09-30 22:12:13.970031', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:14.030413', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.22813524305820465, 'timestamp': '2025-09-30 22:12:14.034019', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:14.090735', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.3519565761089325, 'timestamp': '2025-09-30 22:12:14.097789', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:14.155196', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.14383260905742645, 'timestamp': '2025-09-30 22:12:14.158901', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:14.217428', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.1543911099433899, 'timestamp': '2025-09-30 22:12:14.222167', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:14.279617', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.18572334945201874, 'timestamp': '2025-09-30 22:12:14.283505', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:14.341613', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.2207956165075302, 'timestamp': '2025-09-30 22:12:14.348885', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:14.406245', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.17238770425319672, 'timestamp': '2025-09-30 22:12:14.408952', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:14.466477', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.2218809276819229, 'timestamp': '2025-09-30 22:12:14.470307', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:14.528449', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.1464463770389557, 'timestamp': '2025-09-30 22:12:14.532232', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:14.592955', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.19648714363574982, 'timestamp': '2025-09-30 22:12:14.598861', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:14.655704', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.2803334891796112, 'timestamp': '2025-09-30 22:12:14.659925', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:14.719363', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.18118534982204437, 'timestamp': '2025-09-30 22:12:14.723970', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:14.786661', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.21979981660842896, 'timestamp': '2025-09-30 22:12:14.789928', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:14.846946', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.21556292474269867, 'timestamp': '2025-09-30 22:12:14.855464', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:14.922632', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.14268945157527924, 'timestamp': '2025-09-30 22:12:14.933810', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:14.990829', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.11001583933830261, 'timestamp': '2025-09-30 22:12:15.000745', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:12:15.058728', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.2407507598400116, 'timestamp': '2025-09-30 22:12:15.061196', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:15.125437', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.1869174838066101, 'timestamp': '2025-09-30 22:12:15.132379', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:15.188637', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.294333279132843, 'timestamp': '2025-09-30 22:12:15.191894', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:15.248354', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.17190970480442047, 'timestamp': '2025-09-30 22:12:15.251632', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:15.309420', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.3215515911579132, 'timestamp': '2025-09-30 22:12:15.312740', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:15.369224', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.13677817583084106, 'timestamp': '2025-09-30 22:12:15.376040', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:15.431680', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.2618434727191925, 'timestamp': '2025-09-30 22:12:15.434967', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:15.493062', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.16722552478313446, 'timestamp': '2025-09-30 22:12:15.496098', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:15.553157', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.19913063943386078, 'timestamp': '2025-09-30 22:12:15.560088', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:15.616619', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.26987987756729126, 'timestamp': '2025-09-30 22:12:15.622980', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:15.680744', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.13895970582962036, 'timestamp': '2025-09-30 22:12:15.692021', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:15.750904', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.25575563311576843, 'timestamp': '2025-09-30 22:12:15.761373', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:15.819529', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.2878929674625397, 'timestamp': '2025-09-30 22:12:15.823336', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:15.886957', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.16302500665187836, 'timestamp': '2025-09-30 22:12:15.893842', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:15.949808', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.14996309578418732, 'timestamp': '2025-09-30 22:12:15.952526', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:16.008208', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.18102094531059265, 'timestamp': '2025-09-30 22:12:16.024456', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:16.082384', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.1703808456659317, 'timestamp': '2025-09-30 22:12:16.084753', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:16.140718', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.3236149847507477, 'timestamp': '2025-09-30 22:12:16.147704', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:12:29.753225', 'step': 932, 'epoch': 1} {'type': 'pplx', 'content': 9077.70475427053, 'timestamp': '2025-09-30 22:12:29.764762', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:29.822802', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.11846984922885895, 'timestamp': '2025-09-30 22:12:29.825524', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:29.888153', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.13741722702980042, 'timestamp': '2025-09-30 22:12:29.896447', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:29.953215', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.2683463394641876, 'timestamp': '2025-09-30 22:12:29.964639', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:30.021539', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.14088799059391022, 'timestamp': '2025-09-30 22:12:30.031829', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:30.086889', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.23869071900844574, 'timestamp': '2025-09-30 22:12:30.089774', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:30.150114', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.29640087485313416, 'timestamp': '2025-09-30 22:12:30.153481', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:12:30.213855', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.23396742343902588, 'timestamp': '2025-09-30 22:12:30.216917', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:30.282016', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.23035909235477448, 'timestamp': '2025-09-30 22:12:30.288251', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:30.350045', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.1525399386882782, 'timestamp': '2025-09-30 22:12:30.354760', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:30.410615', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.23942089080810547, 'timestamp': '2025-09-30 22:12:30.415563', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:30.475532', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.22143802046775818, 'timestamp': '2025-09-30 22:12:30.482560', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:30.543827', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.1341884732246399, 'timestamp': '2025-09-30 22:12:30.551360', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:30.611419', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.19387735426425934, 'timestamp': '2025-09-30 22:12:30.615656', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:30.677320', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.11443384736776352, 'timestamp': '2025-09-30 22:12:30.680393', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:30.737890', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.2335507869720459, 'timestamp': '2025-09-30 22:12:30.741237', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:30.807299', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.2292100489139557, 'timestamp': '2025-09-30 22:12:30.813311', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:30.877750', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.21403740346431732, 'timestamp': '2025-09-30 22:12:30.881626', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:30.942823', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.1699080765247345, 'timestamp': '2025-09-30 22:12:30.957243', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:31.015001', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.17974725365638733, 'timestamp': '2025-09-30 22:12:31.018009', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:31.074152', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.19602970778942108, 'timestamp': '2025-09-30 22:12:31.080621', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.140936', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.11100554466247559, 'timestamp': '2025-09-30 22:12:31.144288', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:31.203178', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.13254953920841217, 'timestamp': '2025-09-30 22:12:31.205854', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.261771', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.1285274624824524, 'timestamp': '2025-09-30 22:12:31.264626', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.325351', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.12240595370531082, 'timestamp': '2025-09-30 22:12:31.332905', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.389121', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.21916446089744568, 'timestamp': '2025-09-30 22:12:31.391379', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:31.448964', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.16983330249786377, 'timestamp': '2025-09-30 22:12:31.454021', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.511207', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.24157916009426117, 'timestamp': '2025-09-30 22:12:31.514953', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.577890', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.17395064234733582, 'timestamp': '2025-09-30 22:12:31.586515', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.643385', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.17047491669654846, 'timestamp': '2025-09-30 22:12:31.646665', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:31.703102', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.18591102957725525, 'timestamp': '2025-09-30 22:12:31.706861', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.765079', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.19581495225429535, 'timestamp': '2025-09-30 22:12:31.768880', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.827191', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.2727483808994293, 'timestamp': '2025-09-30 22:12:31.833905', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:31.889508', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.20721817016601562, 'timestamp': '2025-09-30 22:12:31.892666', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:31.951544', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.24920345842838287, 'timestamp': '2025-09-30 22:12:31.954912', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:32.012873', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.20121552050113678, 'timestamp': '2025-09-30 22:12:32.015573', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:32.074189', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.258103609085083, 'timestamp': '2025-09-30 22:12:32.080957', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:32.136799', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.17533917725086212, 'timestamp': '2025-09-30 22:12:32.140451', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:32.198172', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.27276408672332764, 'timestamp': '2025-09-30 22:12:32.203034', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:32.260736', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.15920701622962952, 'timestamp': '2025-09-30 22:12:32.264981', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:32.322413', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.2549150884151459, 'timestamp': '2025-09-30 22:12:32.328873', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:32.384871', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.18176935613155365, 'timestamp': '2025-09-30 22:12:32.392294', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:32.449685', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.15627916157245636, 'timestamp': '2025-09-30 22:12:32.453888', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:32.509962', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.23353002965450287, 'timestamp': '2025-09-30 22:12:32.515948', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:32.584519', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.17458437383174896, 'timestamp': '2025-09-30 22:12:32.590540', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:32.647056', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.17846310138702393, 'timestamp': '2025-09-30 22:12:32.650251', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:32.708534', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.2166609913110733, 'timestamp': '2025-09-30 22:12:32.712241', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:32.776538', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.26427173614501953, 'timestamp': '2025-09-30 22:12:32.780015', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:32.836880', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.19966134428977966, 'timestamp': '2025-09-30 22:12:32.844345', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:32.902783', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.1445484310388565, 'timestamp': '2025-09-30 22:12:32.906029', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:32.968181', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.14057762920856476, 'timestamp': '2025-09-30 22:12:32.970855', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:33.027263', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.13063742220401764, 'timestamp': '2025-09-30 22:12:33.029870', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:33.087627', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.3498128056526184, 'timestamp': '2025-09-30 22:12:33.095545', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:33.150537', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.1723112165927887, 'timestamp': '2025-09-30 22:12:33.159286', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:33.216145', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.085210882127285, 'timestamp': '2025-09-30 22:12:33.219569', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:33.283169', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.15476788580417633, 'timestamp': '2025-09-30 22:12:33.289940', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:33.348626', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.29521214962005615, 'timestamp': '2025-09-30 22:12:33.355576', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:33.411587', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.2543710768222809, 'timestamp': '2025-09-30 22:12:33.414515', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:33.471735', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.16321446001529694, 'timestamp': '2025-09-30 22:12:33.475166', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:33.531272', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.12496593594551086, 'timestamp': '2025-09-30 22:12:33.533572', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:33.589309', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.19127315282821655, 'timestamp': '2025-09-30 22:12:33.596097', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:33.654449', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.1900244653224945, 'timestamp': '2025-09-30 22:12:33.657800', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:33.721321', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.23376049101352692, 'timestamp': '2025-09-30 22:12:33.724571', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:33.797132', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.16662847995758057, 'timestamp': '2025-09-30 22:12:33.805451', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:33.862285', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.2179681360721588, 'timestamp': '2025-09-30 22:12:33.870544', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:33.934305', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.17372050881385803, 'timestamp': '2025-09-30 22:12:33.937763', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:33.998280', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.20690995454788208, 'timestamp': '2025-09-30 22:12:34.002315', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:34.063482', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.1886254847049713, 'timestamp': '2025-09-30 22:12:34.068122', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:34.124703', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.13535556197166443, 'timestamp': '2025-09-30 22:12:34.130705', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-30 22:12:34.526730', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:34.587507', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.21139927208423615, 'timestamp': '2025-09-30 22:12:34.590829', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:34.648057', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.21654145419597626, 'timestamp': '2025-09-30 22:12:34.652027', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:34.708979', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.29667091369628906, 'timestamp': '2025-09-30 22:12:34.716941', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:34.772522', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.19114530086517334, 'timestamp': '2025-09-30 22:12:34.781536', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:34.836803', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.20732298493385315, 'timestamp': '2025-09-30 22:12:34.839222', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:34.895896', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.17717839777469635, 'timestamp': '2025-09-30 22:12:34.898754', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:34.955004', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.20587415993213654, 'timestamp': '2025-09-30 22:12:34.958137', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:35.014157', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.14703774452209473, 'timestamp': '2025-09-30 22:12:35.021538', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:35.077526', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.23215270042419434, 'timestamp': '2025-09-30 22:12:35.080845', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:35.137362', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.12509052455425262, 'timestamp': '2025-09-30 22:12:35.140762', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:35.198851', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.21703900396823883, 'timestamp': '2025-09-30 22:12:35.202799', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:35.263255', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.18544670939445496, 'timestamp': '2025-09-30 22:12:35.270638', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:35.328078', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.20759324729442596, 'timestamp': '2025-09-30 22:12:35.335735', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:35.398345', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.20425602793693542, 'timestamp': '2025-09-30 22:12:35.401223', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:35.462467', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.14097775518894196, 'timestamp': '2025-09-30 22:12:35.465340', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:35.521373', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.30060723423957825, 'timestamp': '2025-09-30 22:12:35.529525', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:35.588471', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.15493619441986084, 'timestamp': '2025-09-30 22:12:35.598300', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:35.657033', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.12828083336353302, 'timestamp': '2025-09-30 22:12:35.661934', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:35.719245', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.15368564426898956, 'timestamp': '2025-09-30 22:12:35.722713', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:35.780761', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.2056015282869339, 'timestamp': '2025-09-30 22:12:35.789971', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:35.845854', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.1887482851743698, 'timestamp': '2025-09-30 22:12:35.849185', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:35.905904', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.2784852981567383, 'timestamp': '2025-09-30 22:12:35.911016', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:35.968924', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.1884661614894867, 'timestamp': '2025-09-30 22:12:35.973455', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:36.034777', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.17186197638511658, 'timestamp': '2025-09-30 22:12:36.043832', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:36.103846', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.12578655779361725, 'timestamp': '2025-09-30 22:12:36.106901', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:36.162742', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.22758962213993073, 'timestamp': '2025-09-30 22:12:36.167052', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:36.224044', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.15492264926433563, 'timestamp': '2025-09-30 22:12:36.227272', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:36.284626', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.26175883412361145, 'timestamp': '2025-09-30 22:12:36.292313', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:36.348913', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.25519752502441406, 'timestamp': '2025-09-30 22:12:36.351466', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:36.408129', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.1537482887506485, 'timestamp': '2025-09-30 22:12:36.411764', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:36.473507', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.31994062662124634, 'timestamp': '2025-09-30 22:12:36.477045', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:36.535352', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.20861245691776276, 'timestamp': '2025-09-30 22:12:36.543017', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:36.600900', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.2741034924983978, 'timestamp': '2025-09-30 22:12:36.603389', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:36.659908', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.147526353597641, 'timestamp': '2025-09-30 22:12:36.662805', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:36.719080', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.3032955229282379, 'timestamp': '2025-09-30 22:12:36.722912', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:36.779888', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.25694799423217773, 'timestamp': '2025-09-30 22:12:36.786542', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:36.841636', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.2189422845840454, 'timestamp': '2025-09-30 22:12:36.850508', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:36.911601', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.16431483626365662, 'timestamp': '2025-09-30 22:12:36.915918', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:36.972736', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.12896884977817535, 'timestamp': '2025-09-30 22:12:36.975324', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:12:37.042398', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.20621570944786072, 'timestamp': '2025-09-30 22:12:37.052526', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:37.108066', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.24048185348510742, 'timestamp': '2025-09-30 22:12:37.112870', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:37.169272', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.2472948133945465, 'timestamp': '2025-09-30 22:12:37.172749', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:37.234445', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.16131865978240967, 'timestamp': '2025-09-30 22:12:37.241268', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:37.296882', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.22360414266586304, 'timestamp': '2025-09-30 22:12:37.302975', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:37.358134', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.14269280433654785, 'timestamp': '2025-09-30 22:12:37.360854', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:37.417029', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.1504736691713333, 'timestamp': '2025-09-30 22:12:37.420456', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:37.476357', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.21397347748279572, 'timestamp': '2025-09-30 22:12:37.481927', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:37.539418', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.23679964244365692, 'timestamp': '2025-09-30 22:12:37.545364', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:37.603883', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.24395357072353363, 'timestamp': '2025-09-30 22:12:37.608188', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:37.663740', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.17886687815189362, 'timestamp': '2025-09-30 22:12:37.666814', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:37.725612', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.1289771944284439, 'timestamp': '2025-09-30 22:12:37.730523', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:37.786534', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.15374600887298584, 'timestamp': '2025-09-30 22:12:37.792537', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:37.853680', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.1852901577949524, 'timestamp': '2025-09-30 22:12:37.857545', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:37.931231', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.16049624979496002, 'timestamp': '2025-09-30 22:12:37.934375', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:37.990417', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.12764306366443634, 'timestamp': '2025-09-30 22:12:37.993939', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:12:38.051980', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.21465782821178436, 'timestamp': '2025-09-30 22:12:38.058639', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:38.114912', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.2843315005302429, 'timestamp': '2025-09-30 22:12:38.121651', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:38.178878', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.17361558973789215, 'timestamp': '2025-09-30 22:12:38.181521', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:38.237552', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.21387211978435516, 'timestamp': '2025-09-30 22:12:38.240814', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:38.298015', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.26288414001464844, 'timestamp': '2025-09-30 22:12:38.306823', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:38.369168', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.3286800980567932, 'timestamp': '2025-09-30 22:12:38.372898', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:38.435045', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.14837238192558289, 'timestamp': '2025-09-30 22:12:38.445964', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:38.502589', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.20727340877056122, 'timestamp': '2025-09-30 22:12:38.505637', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:38.562199', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.17245903611183167, 'timestamp': '2025-09-30 22:12:38.568963', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:38.625504', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.19849038124084473, 'timestamp': '2025-09-30 22:12:38.628459', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:38.684070', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.20391219854354858, 'timestamp': '2025-09-30 22:12:38.688386', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:38.746129', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.23917658627033234, 'timestamp': '2025-09-30 22:12:38.748921', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:38.812507', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.17133764922618866, 'timestamp': '2025-09-30 22:12:38.822505', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:38.883354', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.1554572880268097, 'timestamp': '2025-09-30 22:12:38.888559', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:38.944964', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.20179483294487, 'timestamp': '2025-09-30 22:12:38.948008', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:39.004552', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.2154114544391632, 'timestamp': '2025-09-30 22:12:39.012974', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:39.072472', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.21038609743118286, 'timestamp': '2025-09-30 22:12:39.079897', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:39.137428', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.1869608610868454, 'timestamp': '2025-09-30 22:12:39.140102', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:39.198025', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.19968487322330475, 'timestamp': '2025-09-30 22:12:39.201406', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:39.258515', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.171259805560112, 'timestamp': '2025-09-30 22:12:39.262187', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:39.319631', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.1316150575876236, 'timestamp': '2025-09-30 22:12:39.326340', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:39.381869', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.24486522376537323, 'timestamp': '2025-09-30 22:12:39.389329', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:39.445013', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.1737489402294159, 'timestamp': '2025-09-30 22:12:39.450307', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:39.509326', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.12531431019306183, 'timestamp': '2025-09-30 22:12:39.512048', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:39.569608', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.2566583752632141, 'timestamp': '2025-09-30 22:12:39.576796', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:39.631611', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.14845030009746552, 'timestamp': '2025-09-30 22:12:39.637959', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:39.693384', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.13657638430595398, 'timestamp': '2025-09-30 22:12:39.695920', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:39.752842', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.2763364017009735, 'timestamp': '2025-09-30 22:12:39.755511', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:39.812372', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.0969676747918129, 'timestamp': '2025-09-30 22:12:39.824377', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:39.879805', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.23177525401115417, 'timestamp': '2025-09-30 22:12:39.882602', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:39.943740', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.18873770534992218, 'timestamp': '2025-09-30 22:12:39.947180', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:40.009957', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.18430837988853455, 'timestamp': '2025-09-30 22:12:40.012316', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:40.068198', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.23545309901237488, 'timestamp': '2025-09-30 22:12:40.074800', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:40.130196', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.11601437628269196, 'timestamp': '2025-09-30 22:12:40.133143', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:40.195600', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.15076856315135956, 'timestamp': '2025-09-30 22:12:40.198287', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:40.257506', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.12586930394172668, 'timestamp': '2025-09-30 22:12:40.265297', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:40.320930', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.16196775436401367, 'timestamp': '2025-09-30 22:12:40.329975', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:40.385025', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.17412927746772766, 'timestamp': '2025-09-30 22:12:40.388365', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:40.444678', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.14433453977108002, 'timestamp': '2025-09-30 22:12:40.448255', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:40.504160', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.19770851731300354, 'timestamp': '2025-09-30 22:12:40.507807', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:40.570617', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.16478586196899414, 'timestamp': '2025-09-30 22:12:40.581083', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:40.650506', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.23873360455036163, 'timestamp': '2025-09-30 22:12:40.657017', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:40.719313', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.21043922007083893, 'timestamp': '2025-09-30 22:12:40.722090', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:40.793586', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.18372422456741333, 'timestamp': '2025-09-30 22:12:40.796398', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:40.853296', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.1484333574771881, 'timestamp': '2025-09-30 22:12:40.861424', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:40.916480', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.11905190348625183, 'timestamp': '2025-09-30 22:12:40.919658', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:40.982600', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.14147280156612396, 'timestamp': '2025-09-30 22:12:40.986210', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:41.047734', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.18773658573627472, 'timestamp': '2025-09-30 22:12:41.055810', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:41.115804', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.13631607592105865, 'timestamp': '2025-09-30 22:12:41.127482', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:41.185589', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.25866761803627014, 'timestamp': '2025-09-30 22:12:41.187960', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:41.242832', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.16792814433574677, 'timestamp': '2025-09-30 22:12:41.246778', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:41.305844', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.21183863282203674, 'timestamp': '2025-09-30 22:12:41.308343', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:41.367984', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.23247472941875458, 'timestamp': '2025-09-30 22:12:41.374546', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:41.429298', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.1626669317483902, 'timestamp': '2025-09-30 22:12:41.432012', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:41.488181', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.16574375331401825, 'timestamp': '2025-09-30 22:12:41.491315', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:41.551859', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.15963545441627502, 'timestamp': '2025-09-30 22:12:41.556221', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:41.623237', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.23258939385414124, 'timestamp': '2025-09-30 22:12:41.629431', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:41.685858', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.19910046458244324, 'timestamp': '2025-09-30 22:12:41.688634', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:41.744588', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.158043771982193, 'timestamp': '2025-09-30 22:12:41.747274', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:41.806967', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.13319414854049683, 'timestamp': '2025-09-30 22:12:41.810133', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:41.867672', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.15500923991203308, 'timestamp': '2025-09-30 22:12:41.873875', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:41.930074', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.2099320888519287, 'timestamp': '2025-09-30 22:12:41.933873', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:41.993052', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.23607559502124786, 'timestamp': '2025-09-30 22:12:41.997270', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:42.054027', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.1939982920885086, 'timestamp': '2025-09-30 22:12:42.058082', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:42.122617', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.1697155088186264, 'timestamp': '2025-09-30 22:12:42.129380', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:42.185629', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.24427513778209686, 'timestamp': '2025-09-30 22:12:42.190228', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:42.250543', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.21851640939712524, 'timestamp': '2025-09-30 22:12:42.254560', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:42.315974', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.163552388548851, 'timestamp': '2025-09-30 22:12:42.318772', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:42.375044', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.19823020696640015, 'timestamp': '2025-09-30 22:12:42.381360', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:42.443496', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.16150455176830292, 'timestamp': '2025-09-30 22:12:42.445747', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:42.504069', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.13832247257232666, 'timestamp': '2025-09-30 22:12:42.507527', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:42.564929', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.21933721005916595, 'timestamp': '2025-09-30 22:12:42.567668', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:42.625044', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.2251024693250656, 'timestamp': '2025-09-30 22:12:42.637394', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:42.692733', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.12876681983470917, 'timestamp': '2025-09-30 22:12:42.696138', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:42.753522', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.19521337747573853, 'timestamp': '2025-09-30 22:12:42.757084', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:42.819581', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.2649197280406952, 'timestamp': '2025-09-30 22:12:42.822557', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:42.891036', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.14959783852100372, 'timestamp': '2025-09-30 22:12:42.898790', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:42.957364', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.25701355934143066, 'timestamp': '2025-09-30 22:12:42.964780', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:43.021451', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.15662957727909088, 'timestamp': '2025-09-30 22:12:43.023878', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:43.080599', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.34929394721984863, 'timestamp': '2025-09-30 22:12:43.083106', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:43.139213', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.2702282667160034, 'timestamp': '2025-09-30 22:12:43.145361', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:43.202295', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.1777520626783371, 'timestamp': '2025-09-30 22:12:43.208126', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:43.264211', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.17591029405593872, 'timestamp': '2025-09-30 22:12:43.266635', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:43.324387', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.17949222028255463, 'timestamp': '2025-09-30 22:12:43.333208', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:43.388904', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.10636331140995026, 'timestamp': '2025-09-30 22:12:43.395006', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:43.456893', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.1831047534942627, 'timestamp': '2025-09-30 22:12:43.460807', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:43.517271', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.24230416119098663, 'timestamp': '2025-09-30 22:12:43.520070', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:43.576966', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.11143571138381958, 'timestamp': '2025-09-30 22:12:43.579454', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:43.638062', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.23011863231658936, 'timestamp': '2025-09-30 22:12:43.645505', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:12:43.701885', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.17789649963378906, 'timestamp': '2025-09-30 22:12:43.704455', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:43.760392', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.13040903210639954, 'timestamp': '2025-09-30 22:12:43.765433', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:43.823158', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.19516348838806152, 'timestamp': '2025-09-30 22:12:43.828453', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:43.886145', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.2865290343761444, 'timestamp': '2025-09-30 22:12:43.892754', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:43.948187', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.26813653111457825, 'timestamp': '2025-09-30 22:12:43.951350', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:44.010174', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.2616274058818817, 'timestamp': '2025-09-30 22:12:44.020756', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:44.078909', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.11517631262540817, 'timestamp': '2025-09-30 22:12:44.083139', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:44.140395', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.20000825822353363, 'timestamp': '2025-09-30 22:12:44.149751', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:44.206129', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.15703444182872772, 'timestamp': '2025-09-30 22:12:44.208510', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:44.263567', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.14223612844944, 'timestamp': '2025-09-30 22:12:44.266158', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:44.321934', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.16661377251148224, 'timestamp': '2025-09-30 22:12:44.324162', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:12:44.380769', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.21845144033432007, 'timestamp': '2025-09-30 22:12:44.387485', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:44.448558', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.2238670140504837, 'timestamp': '2025-09-30 22:12:44.451393', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:44.509445', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.2216857522726059, 'timestamp': '2025-09-30 22:12:44.515629', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:44.577092', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.1731707900762558, 'timestamp': '2025-09-30 22:12:44.579779', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:44.636589', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.16666533052921295, 'timestamp': '2025-09-30 22:12:44.643204', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:44.709744', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.13325437903404236, 'timestamp': '2025-09-30 22:12:44.713596', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:44.777605', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.26186221837997437, 'timestamp': '2025-09-30 22:12:44.781256', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:44.838572', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.19267579913139343, 'timestamp': '2025-09-30 22:12:44.843987', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:44.901013', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.12224666029214859, 'timestamp': '2025-09-30 22:12:44.915115', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:44.971259', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.14743512868881226, 'timestamp': '2025-09-30 22:12:44.979437', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:45.036902', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.1509329378604889, 'timestamp': '2025-09-30 22:12:45.043524', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:45.100517', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.17634770274162292, 'timestamp': '2025-09-30 22:12:45.103407', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:45.159979', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.15290920436382294, 'timestamp': '2025-09-30 22:12:45.166213', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:45.222222', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.19052289426326752, 'timestamp': '2025-09-30 22:12:45.224918', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:45.283509', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.1908445954322815, 'timestamp': '2025-09-30 22:12:45.292898', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:45.356676', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.15620876848697662, 'timestamp': '2025-09-30 22:12:45.360205', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:45.417449', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.1862248182296753, 'timestamp': '2025-09-30 22:12:45.423946', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:45.488982', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.16067323088645935, 'timestamp': '2025-09-30 22:12:45.494894', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:45.556898', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.18388037383556366, 'timestamp': '2025-09-30 22:12:45.560629', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:45.619512', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.15656912326812744, 'timestamp': '2025-09-30 22:12:45.623355', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:45.694579', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.22918155789375305, 'timestamp': '2025-09-30 22:12:45.705669', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:45.777129', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.1781524419784546, 'timestamp': '2025-09-30 22:12:45.781403', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:45.838323', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.2583979666233063, 'timestamp': '2025-09-30 22:12:45.842045', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:45.912023', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.13603459298610687, 'timestamp': '2025-09-30 22:12:45.924590', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:45.987957', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.20950353145599365, 'timestamp': '2025-09-30 22:12:45.994669', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:46.055622', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.20746538043022156, 'timestamp': '2025-09-30 22:12:46.058043', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:46.127353', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.17535962164402008, 'timestamp': '2025-09-30 22:12:46.130201', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:46.187935', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.1514834761619568, 'timestamp': '2025-09-30 22:12:46.190619', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:46.246998', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.1232081949710846, 'timestamp': '2025-09-30 22:12:46.253095', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:46.308880', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.14321914315223694, 'timestamp': '2025-09-30 22:12:46.317056', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:46.377793', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.18292191624641418, 'timestamp': '2025-09-30 22:12:46.386065', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:46.442043', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.2539004385471344, 'timestamp': '2025-09-30 22:12:46.445758', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:46.511593', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.19087259471416473, 'timestamp': '2025-09-30 22:12:46.519220', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:46.586732', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.13526085019111633, 'timestamp': '2025-09-30 22:12:46.591772', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:46.651740', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.16062860190868378, 'timestamp': '2025-09-30 22:12:46.658459', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:46.722451', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.2038429081439972, 'timestamp': '2025-09-30 22:12:46.725872', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:46.783324', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.09787719696760178, 'timestamp': '2025-09-30 22:12:46.790543', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:46.846419', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.1658685803413391, 'timestamp': '2025-09-30 22:12:46.856750', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:46.914734', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.23814471065998077, 'timestamp': '2025-09-30 22:12:46.918505', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:46.976006', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.19641417264938354, 'timestamp': '2025-09-30 22:12:46.979034', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.044199', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.19846650958061218, 'timestamp': '2025-09-30 22:12:47.054630', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.114089', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.24107739329338074, 'timestamp': '2025-09-30 22:12:47.121543', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.190161', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.14445875585079193, 'timestamp': '2025-09-30 22:12:47.193380', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.251928', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.1689179688692093, 'timestamp': '2025-09-30 22:12:47.255952', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:47.314323', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.12283997237682343, 'timestamp': '2025-09-30 22:12:47.321518', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.377549', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.157147616147995, 'timestamp': '2025-09-30 22:12:47.380535', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:47.439070', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.1787220686674118, 'timestamp': '2025-09-30 22:12:47.443412', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:47.500433', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.1945653110742569, 'timestamp': '2025-09-30 22:12:47.503205', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:47.563967', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.19495917856693268, 'timestamp': '2025-09-30 22:12:47.571312', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.627735', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.15425997972488403, 'timestamp': '2025-09-30 22:12:47.631879', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:47.692189', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.2272043526172638, 'timestamp': '2025-09-30 22:12:47.695650', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.759508', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.1785009354352951, 'timestamp': '2025-09-30 22:12:47.764868', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:47.825286', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.17700564861297607, 'timestamp': '2025-09-30 22:12:47.831473', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:47.897518', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.10677989572286606, 'timestamp': '2025-09-30 22:12:47.901406', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:47.967417', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.22687864303588867, 'timestamp': '2025-09-30 22:12:47.972213', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:48.034082', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.15429522097110748, 'timestamp': '2025-09-30 22:12:48.043628', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:12:48.102932', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.20424628257751465, 'timestamp': '2025-09-30 22:12:48.109449', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:48.169064', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.14845514297485352, 'timestamp': '2025-09-30 22:12:48.171340', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:48.236891', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.13638605177402496, 'timestamp': '2025-09-30 22:12:48.239947', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:48.296460', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.15425236523151398, 'timestamp': '2025-09-30 22:12:48.300630', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:48.356912', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.13553397357463837, 'timestamp': '2025-09-30 22:12:48.363034', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:48.417886', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.17596150934696198, 'timestamp': '2025-09-30 22:12:48.420450', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:48.476096', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.1436741054058075, 'timestamp': '2025-09-30 22:12:48.480450', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:48.562739', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.20372943580150604, 'timestamp': '2025-09-30 22:12:48.569411', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:48.629428', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.14825472235679626, 'timestamp': '2025-09-30 22:12:48.636915', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:48.706865', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.1872871369123459, 'timestamp': '2025-09-30 22:12:48.709867', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:48.766898', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.17796854674816132, 'timestamp': '2025-09-30 22:12:48.781525', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:48.839010', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.18515750765800476, 'timestamp': '2025-09-30 22:12:48.843276', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:48.898942', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.15142259001731873, 'timestamp': '2025-09-30 22:12:48.906012', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:48.973653', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.26955902576446533, 'timestamp': '2025-09-30 22:12:48.981249', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:49.050338', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.2962636649608612, 'timestamp': '2025-09-30 22:12:49.053719', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:49.114965', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.24111080169677734, 'timestamp': '2025-09-30 22:12:49.121210', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:49.177129', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.1308453530073166, 'timestamp': '2025-09-30 22:12:49.183416', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:49.239477', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.20053847134113312, 'timestamp': '2025-09-30 22:12:49.242025', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:49.298761', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.305469274520874, 'timestamp': '2025-09-30 22:12:49.305109', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:49.366411', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.23664188385009766, 'timestamp': '2025-09-30 22:12:49.369912', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:49.427653', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.18991225957870483, 'timestamp': '2025-09-30 22:12:49.435134', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:49.495913', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.17484256625175476, 'timestamp': '2025-09-30 22:12:49.499634', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:49.558279', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.17046456038951874, 'timestamp': '2025-09-30 22:12:49.565672', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:49.623166', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.1299184262752533, 'timestamp': '2025-09-30 22:12:49.627825', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:49.698301', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.13693813979625702, 'timestamp': '2025-09-30 22:12:49.704874', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:49.759840', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.21897254884243011, 'timestamp': '2025-09-30 22:12:49.763249', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-30 22:12:49.840281', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.4115963578224182, 'timestamp': '2025-09-30 22:12:49.843548', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:49.904234', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.14282214641571045, 'timestamp': '2025-09-30 22:12:49.907058', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:49.964344', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.2258899211883545, 'timestamp': '2025-09-30 22:12:49.972541', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:50.028438', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.30149027705192566, 'timestamp': '2025-09-30 22:12:50.035642', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:50.092153', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.1830621063709259, 'timestamp': '2025-09-30 22:12:50.096881', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:50.157869', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.1808684915304184, 'timestamp': '2025-09-30 22:12:50.160416', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.217053', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.1437249779701233, 'timestamp': '2025-09-30 22:12:50.223207', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.279208', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.22347842156887054, 'timestamp': '2025-09-30 22:12:50.283674', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:50.340750', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.1646786481142044, 'timestamp': '2025-09-30 22:12:50.344925', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:50.402109', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.27260541915893555, 'timestamp': '2025-09-30 22:12:50.405428', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.468685', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.09875994175672531, 'timestamp': '2025-09-30 22:12:50.477065', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.545742', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.18479178845882416, 'timestamp': '2025-09-30 22:12:50.548256', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.619053', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.1301703155040741, 'timestamp': '2025-09-30 22:12:50.621786', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:50.686009', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.15938058495521545, 'timestamp': '2025-09-30 22:12:50.688669', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:50.750402', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.1307613104581833, 'timestamp': '2025-09-30 22:12:50.758276', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.824575', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.15206067264080048, 'timestamp': '2025-09-30 22:12:50.827127', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.889177', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.1593940556049347, 'timestamp': '2025-09-30 22:12:50.891743', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:50.957972', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.16636629402637482, 'timestamp': '2025-09-30 22:12:50.960794', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:51.021251', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.18565301597118378, 'timestamp': '2025-09-30 22:12:51.028677', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.097457', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.16877250373363495, 'timestamp': '2025-09-30 22:12:51.099866', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.157785', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.21783219277858734, 'timestamp': '2025-09-30 22:12:51.161302', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.220143', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.20498891174793243, 'timestamp': '2025-09-30 22:12:51.227290', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.288056', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.2555560767650604, 'timestamp': '2025-09-30 22:12:51.295342', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:51.369430', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.2092830091714859, 'timestamp': '2025-09-30 22:12:51.373012', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:51.449299', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.24223953485488892, 'timestamp': '2025-09-30 22:12:51.454424', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.516861', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.1859511286020279, 'timestamp': '2025-09-30 22:12:51.524173', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.586069', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.1530088186264038, 'timestamp': '2025-09-30 22:12:51.592973', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.651637', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.27524590492248535, 'timestamp': '2025-09-30 22:12:51.654721', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:51.714595', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.19483667612075806, 'timestamp': '2025-09-30 22:12:51.716778', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:51.775131', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.2506118714809418, 'timestamp': '2025-09-30 22:12:51.777874', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:51.835757', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.06467576324939728, 'timestamp': '2025-09-30 22:12:51.845402', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:51.903696', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.16342322528362274, 'timestamp': '2025-09-30 22:12:51.906017', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:51.964030', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.35954999923706055, 'timestamp': '2025-09-30 22:12:51.966324', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:52.033752', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.22995957732200623, 'timestamp': '2025-09-30 22:12:52.037630', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:52.097257', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.19286786019802094, 'timestamp': '2025-09-30 22:12:52.103464', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:52.162229', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.19460031390190125, 'timestamp': '2025-09-30 22:12:52.164444', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:52.221337', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.12359955906867981, 'timestamp': '2025-09-30 22:12:52.224014', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:52.292243', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.11731337010860443, 'timestamp': '2025-09-30 22:12:52.294695', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:52.352629', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.20059210062026978, 'timestamp': '2025-09-30 22:12:52.364890', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:52.421519', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.27967339754104614, 'timestamp': '2025-09-30 22:12:52.425143', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:52.482942', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.1697525829076767, 'timestamp': '2025-09-30 22:12:52.485263', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:52.543206', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.15213647484779358, 'timestamp': '2025-09-30 22:12:52.547159', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:52.607087', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.20444917678833008, 'timestamp': '2025-09-30 22:12:52.612974', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:52.673142', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.15931718051433563, 'timestamp': '2025-09-30 22:12:52.679984', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:52.739552', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.13865399360656738, 'timestamp': '2025-09-30 22:12:52.742254', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:52.800247', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.20454667508602142, 'timestamp': '2025-09-30 22:12:52.802915', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:52.861771', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.15809211134910583, 'timestamp': '2025-09-30 22:12:52.867497', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:52.923442', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.1935257762670517, 'timestamp': '2025-09-30 22:12:52.925969', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:52.986296', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.2304973602294922, 'timestamp': '2025-09-30 22:12:52.988816', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:53.050978', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.2615872919559479, 'timestamp': '2025-09-30 22:12:53.053793', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:53.111213', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.14336422085762024, 'timestamp': '2025-09-30 22:12:53.117085', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:53.172813', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.14841242134571075, 'timestamp': '2025-09-30 22:12:53.176941', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:53.232941', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.1878548562526703, 'timestamp': '2025-09-30 22:12:53.235513', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:53.293163', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.21695812046527863, 'timestamp': '2025-09-30 22:12:53.296320', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:53.354547', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.18879753351211548, 'timestamp': '2025-09-30 22:12:53.360978', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:53.417937', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.1597956418991089, 'timestamp': '2025-09-30 22:12:53.422167', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:53.480457', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.15038464963436127, 'timestamp': '2025-09-30 22:12:53.482609', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:53.539968', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.21794354915618896, 'timestamp': '2025-09-30 22:12:53.542498', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:53.599653', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.2144315391778946, 'timestamp': '2025-09-30 22:12:53.606994', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:53.664091', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.1541643589735031, 'timestamp': '2025-09-30 22:12:53.667220', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:53.725892', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.17625848948955536, 'timestamp': '2025-09-30 22:12:53.729326', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:53.785202', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.18323668837547302, 'timestamp': '2025-09-30 22:12:53.787728', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:53.843374', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.16955378651618958, 'timestamp': '2025-09-30 22:12:53.855681', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:53.910920', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.27064958214759827, 'timestamp': '2025-09-30 22:12:53.913083', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:53.970118', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.20578284561634064, 'timestamp': '2025-09-30 22:12:53.973054', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:54.029562', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.26044824719429016, 'timestamp': '2025-09-30 22:12:54.032985', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:54.091307', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.20028498768806458, 'timestamp': '2025-09-30 22:12:54.098435', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:54.155435', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.17474353313446045, 'timestamp': '2025-09-30 22:12:54.158116', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:54.215109', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.32686370611190796, 'timestamp': '2025-09-30 22:12:54.222118', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:54.280234', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.17041343450546265, 'timestamp': '2025-09-30 22:12:54.282684', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:54.338416', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.25959038734436035, 'timestamp': '2025-09-30 22:12:54.344667', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:54.401839', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.15521271526813507, 'timestamp': '2025-09-30 22:12:54.404300', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:54.466369', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.09895973652601242, 'timestamp': '2025-09-30 22:12:54.470234', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:54.529822', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.1561821550130844, 'timestamp': '2025-09-30 22:12:54.537106', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:54.593749', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.15391592681407928, 'timestamp': '2025-09-30 22:12:54.608020', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:54.665410', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.20746102929115295, 'timestamp': '2025-09-30 22:12:54.668362', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:54.728248', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.1805369257926941, 'timestamp': '2025-09-30 22:12:54.731292', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:54.787935', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.21540305018424988, 'timestamp': '2025-09-30 22:12:54.791087', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:54.850504', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.2296939492225647, 'timestamp': '2025-09-30 22:12:54.859949', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:54.921683', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.16635280847549438, 'timestamp': '2025-09-30 22:12:54.938980', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:55.001155', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.14240975677967072, 'timestamp': '2025-09-30 22:12:55.007276', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:55.070268', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.18647447228431702, 'timestamp': '2025-09-30 22:12:55.083825', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:55.151965', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.10365337878465652, 'timestamp': '2025-09-30 22:12:55.179070', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:55.244929', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.19621475040912628, 'timestamp': '2025-09-30 22:12:55.260554', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:12:55.321597', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.11996288597583771, 'timestamp': '2025-09-30 22:12:55.326378', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:55.383958', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.11954904347658157, 'timestamp': '2025-09-30 22:12:55.396036', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:55.456734', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.14303842186927795, 'timestamp': '2025-09-30 22:12:55.469640', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:55.547878', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.2967679798603058, 'timestamp': '2025-09-30 22:12:55.554469', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:55.612030', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.09639512747526169, 'timestamp': '2025-09-30 22:12:55.619680', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:55.682839', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.1774543821811676, 'timestamp': '2025-09-30 22:12:55.688462', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:55.751625', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.12546226382255554, 'timestamp': '2025-09-30 22:12:55.757989', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:55.817812', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.14030514657497406, 'timestamp': '2025-09-30 22:12:55.837438', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:55.897185', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.124055415391922, 'timestamp': '2025-09-30 22:12:55.907645', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:55.982290', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.1633138805627823, 'timestamp': '2025-09-30 22:12:55.992837', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:56.052632', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.25859540700912476, 'timestamp': '2025-09-30 22:12:56.067950', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:56.126907', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.18594497442245483, 'timestamp': '2025-09-30 22:12:56.160738', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:56.219369', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.25945207476615906, 'timestamp': '2025-09-30 22:12:56.222263', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:56.281108', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.1932472288608551, 'timestamp': '2025-09-30 22:12:56.284384', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:56.345859', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.17496240139007568, 'timestamp': '2025-09-30 22:12:56.352917', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:56.412571', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.09969083964824677, 'timestamp': '2025-09-30 22:12:56.420022', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:56.482627', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.11798202991485596, 'timestamp': '2025-09-30 22:12:56.485816', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:56.549596', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.1538659930229187, 'timestamp': '2025-09-30 22:12:56.554635', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:56.612012', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.23483611643314362, 'timestamp': '2025-09-30 22:12:56.619046', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:56.675290', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.1938251405954361, 'timestamp': '2025-09-30 22:12:56.678424', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:56.734527', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.21769163012504578, 'timestamp': '2025-09-30 22:12:56.737199', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:56.795030', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.14667335152626038, 'timestamp': '2025-09-30 22:12:56.797394', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:56.854965', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.12308643758296967, 'timestamp': '2025-09-30 22:12:56.861485', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:56.919751', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.2783065438270569, 'timestamp': '2025-09-30 22:12:56.927207', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:56.984425', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.20141780376434326, 'timestamp': '2025-09-30 22:12:56.990915', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:57.053193', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.1204909086227417, 'timestamp': '2025-09-30 22:12:57.057080', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.117126', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.22009168565273285, 'timestamp': '2025-09-30 22:12:57.124173', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.191625', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.21417921781539917, 'timestamp': '2025-09-30 22:12:57.194754', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:57.262559', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.18252070248126984, 'timestamp': '2025-09-30 22:12:57.272276', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.328965', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.13366100192070007, 'timestamp': '2025-09-30 22:12:57.334633', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:57.390857', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.1560625582933426, 'timestamp': '2025-09-30 22:12:57.397267', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.454348', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.22337505221366882, 'timestamp': '2025-09-30 22:12:57.457236', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.513979', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.14412200450897217, 'timestamp': '2025-09-30 22:12:57.518500', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.583384', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.305237740278244, 'timestamp': '2025-09-30 22:12:57.586946', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:57.649303', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.13409027457237244, 'timestamp': '2025-09-30 22:12:57.655695', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:57.712095', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.20665158331394196, 'timestamp': '2025-09-30 22:12:57.715652', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.773910', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.28777849674224854, 'timestamp': '2025-09-30 22:12:57.785153', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:57.843293', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.23916809260845184, 'timestamp': '2025-09-30 22:12:57.848235', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:57.906433', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.2051977962255478, 'timestamp': '2025-09-30 22:12:57.914751', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:57.974043', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.21345962584018707, 'timestamp': '2025-09-30 22:12:57.979727', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:58.039721', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.11088985949754715, 'timestamp': '2025-09-30 22:12:58.043281', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:58.100816', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.1299421638250351, 'timestamp': '2025-09-30 22:12:58.105084', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:58.161704', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.2111188769340515, 'timestamp': '2025-09-30 22:12:58.168714', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:58.225370', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.08961749076843262, 'timestamp': '2025-09-30 22:12:58.229635', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:58.285857', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.1445196121931076, 'timestamp': '2025-09-30 22:12:58.288746', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:58.345816', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.2047400325536728, 'timestamp': '2025-09-30 22:12:58.350736', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:58.407041', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.09127342700958252, 'timestamp': '2025-09-30 22:12:58.412821', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:58.468232', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.21266692876815796, 'timestamp': '2025-09-30 22:12:58.470633', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:58.525801', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.19048388302326202, 'timestamp': '2025-09-30 22:12:58.529336', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:58.585185', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.21682153642177582, 'timestamp': '2025-09-30 22:12:58.594795', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:58.650503', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.1252809762954712, 'timestamp': '2025-09-30 22:12:58.661053', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:58.717559', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.07544396817684174, 'timestamp': '2025-09-30 22:12:58.720245', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:58.782348', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.22966037690639496, 'timestamp': '2025-09-30 22:12:58.785158', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:58.844561', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.1495196521282196, 'timestamp': '2025-09-30 22:12:58.849851', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:58.907587', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.25456368923187256, 'timestamp': '2025-09-30 22:12:58.915200', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:12:58.971519', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.1633371263742447, 'timestamp': '2025-09-30 22:12:58.973980', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:59.033347', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.2194211333990097, 'timestamp': '2025-09-30 22:12:59.036224', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:59.094573', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.1455714851617813, 'timestamp': '2025-09-30 22:12:59.101776', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:59.159626', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.15422876179218292, 'timestamp': '2025-09-30 22:12:59.167689', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:59.226497', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.1745293289422989, 'timestamp': '2025-09-30 22:12:59.228762', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:59.284049', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.14796428382396698, 'timestamp': '2025-09-30 22:12:59.288977', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:59.345390', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.22324874997138977, 'timestamp': '2025-09-30 22:12:59.349777', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:59.409076', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.16410787403583527, 'timestamp': '2025-09-30 22:12:59.415421', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:59.472425', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.22194768488407135, 'timestamp': '2025-09-30 22:12:59.479424', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:59.536633', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.1817702203989029, 'timestamp': '2025-09-30 22:12:59.539221', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:12:59.597058', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.16227076947689056, 'timestamp': '2025-09-30 22:12:59.599927', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:12:59.656521', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.1656961739063263, 'timestamp': '2025-09-30 22:12:59.662774', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:59.720431', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.2025950849056244, 'timestamp': '2025-09-30 22:12:59.724652', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:59.781886', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.1360328048467636, 'timestamp': '2025-09-30 22:12:59.788124', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:12:59.846529', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.1858198046684265, 'timestamp': '2025-09-30 22:12:59.849168', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:12:59.909539', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.07900147140026093, 'timestamp': '2025-09-30 22:12:59.917758', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:12:59.974692', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.23101678490638733, 'timestamp': '2025-09-30 22:12:59.977424', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:00.043468', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.1718234121799469, 'timestamp': '2025-09-30 22:13:00.046190', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:00.105836', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.1257755011320114, 'timestamp': '2025-09-30 22:13:00.108967', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:00.169239', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.25253090262413025, 'timestamp': '2025-09-30 22:13:00.175194', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:00.241190', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.20371709764003754, 'timestamp': '2025-09-30 22:13:00.245090', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:00.304258', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.2189284712076187, 'timestamp': '2025-09-30 22:13:00.309756', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:13:13.896943', 'step': 1398, 'epoch': 1} {'type': 'pplx', 'content': 9749.561864849176, 'timestamp': '2025-09-30 22:13:13.912832', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:13.972601', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.24689339101314545, 'timestamp': '2025-09-30 22:13:13.975137', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:14.033283', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.1487521529197693, 'timestamp': '2025-09-30 22:13:14.040469', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:14.119989', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.20964288711547852, 'timestamp': '2025-09-30 22:13:14.124448', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:14.184515', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.19929291307926178, 'timestamp': '2025-09-30 22:13:14.187636', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:14.245789', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.18475326895713806, 'timestamp': '2025-09-30 22:13:14.248388', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:14.310678', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.1895679235458374, 'timestamp': '2025-09-30 22:13:14.317688', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:14.373759', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.16697365045547485, 'timestamp': '2025-09-30 22:13:14.378899', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:14.435678', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.15936008095741272, 'timestamp': '2025-09-30 22:13:14.441044', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:14.500406', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.14189955592155457, 'timestamp': '2025-09-30 22:13:14.503197', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:14.562240', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.27224525809288025, 'timestamp': '2025-09-30 22:13:14.570429', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:14.629735', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.15717366337776184, 'timestamp': '2025-09-30 22:13:14.633529', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:14.692389', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.18231400847434998, 'timestamp': '2025-09-30 22:13:14.695735', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:14.752861', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.1816205084323883, 'timestamp': '2025-09-30 22:13:14.757139', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:14.815574', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.11926618218421936, 'timestamp': '2025-09-30 22:13:14.823721', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:14.879846', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.22868457436561584, 'timestamp': '2025-09-30 22:13:14.884905', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:14.941414', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.19945599138736725, 'timestamp': '2025-09-30 22:13:14.946156', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:15.005392', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.15698915719985962, 'timestamp': '2025-09-30 22:13:15.011492', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:15.080169', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.10871953517198563, 'timestamp': '2025-09-30 22:13:15.087148', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:15.144902', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.2747505307197571, 'timestamp': '2025-09-30 22:13:15.147405', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:15.211535', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.19196397066116333, 'timestamp': '2025-09-30 22:13:15.214421', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:15.276317', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.23325352370738983, 'timestamp': '2025-09-30 22:13:15.279604', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:15.340731', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.2690019905567169, 'timestamp': '2025-09-30 22:13:15.355172', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:15.420749', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.15240077674388885, 'timestamp': '2025-09-30 22:13:15.423399', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:15.479741', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.18551325798034668, 'timestamp': '2025-09-30 22:13:15.482847', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:15.539248', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.1370181441307068, 'timestamp': '2025-09-30 22:13:15.542769', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:15.601103', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.22167736291885376, 'timestamp': '2025-09-30 22:13:15.609218', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:15.667432', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.15233993530273438, 'timestamp': '2025-09-30 22:13:15.675266', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:15.732560', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.2748323380947113, 'timestamp': '2025-09-30 22:13:15.735919', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:15.792644', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.19144240021705627, 'timestamp': '2025-09-30 22:13:15.796328', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:15.853222', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.17435020208358765, 'timestamp': '2025-09-30 22:13:15.859785', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:15.915321', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.16585741937160492, 'timestamp': '2025-09-30 22:13:15.918075', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:15.978524', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.1938267946243286, 'timestamp': '2025-09-30 22:13:15.982059', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:16.047386', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.2623971700668335, 'timestamp': '2025-09-30 22:13:16.051559', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:16.110058', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.21536046266555786, 'timestamp': '2025-09-30 22:13:16.117088', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:16.180423', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.14660008251667023, 'timestamp': '2025-09-30 22:13:16.184044', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:16.241130', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.29109230637550354, 'timestamp': '2025-09-30 22:13:16.244230', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:16.301028', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.1281578689813614, 'timestamp': '2025-09-30 22:13:16.305831', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:16.364613', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.15399222075939178, 'timestamp': '2025-09-30 22:13:16.371942', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:16.432191', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.22763998806476593, 'timestamp': '2025-09-30 22:13:16.434554', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:16.494414', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.1709333211183548, 'timestamp': '2025-09-30 22:13:16.498294', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:16.554077', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.20926254987716675, 'timestamp': '2025-09-30 22:13:16.556865', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:16.626357', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.2126988023519516, 'timestamp': '2025-09-30 22:13:16.632482', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:16.688282', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.1479242742061615, 'timestamp': '2025-09-30 22:13:16.690783', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:16.753098', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.24066784977912903, 'timestamp': '2025-09-30 22:13:16.755725', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:16.812085', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.1852642446756363, 'timestamp': '2025-09-30 22:13:16.815329', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:16.876244', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.2644384503364563, 'timestamp': '2025-09-30 22:13:16.882991', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:16.946137', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.3315332233905792, 'timestamp': '2025-09-30 22:13:16.948644', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:17.006090', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.12459708005189896, 'timestamp': '2025-09-30 22:13:17.011390', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:17.067757', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.17581723630428314, 'timestamp': '2025-09-30 22:13:17.070294', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.126437', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.2517448663711548, 'timestamp': '2025-09-30 22:13:17.138698', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.193842', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.16250838339328766, 'timestamp': '2025-09-30 22:13:17.198232', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:17.256433', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.207330584526062, 'timestamp': '2025-09-30 22:13:17.259795', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.318328', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.24530568718910217, 'timestamp': '2025-09-30 22:13:17.320918', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.378706', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.1722606122493744, 'timestamp': '2025-09-30 22:13:17.385458', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.441964', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.15263910591602325, 'timestamp': '2025-09-30 22:13:17.445515', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:17.502225', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.09743211418390274, 'timestamp': '2025-09-30 22:13:17.504875', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.561554', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.2549894452095032, 'timestamp': '2025-09-30 22:13:17.564301', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:17.621202', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.11753284186124802, 'timestamp': '2025-09-30 22:13:17.627225', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:17.683533', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.1744733303785324, 'timestamp': '2025-09-30 22:13:17.685794', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:17.741777', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.14445409178733826, 'timestamp': '2025-09-30 22:13:17.744778', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:17.800734', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.17412732541561127, 'timestamp': '2025-09-30 22:13:17.804879', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.861842', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.13423380255699158, 'timestamp': '2025-09-30 22:13:17.868185', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:17.926461', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.22083780169487, 'timestamp': '2025-09-30 22:13:17.929347', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:17.987878', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.1973988562822342, 'timestamp': '2025-09-30 22:13:17.991634', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:18.048305', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.27877092361450195, 'timestamp': '2025-09-30 22:13:18.053117', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:18.110888', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.24586547911167145, 'timestamp': '2025-09-30 22:13:18.116864', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:18.172026', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.2228320986032486, 'timestamp': '2025-09-30 22:13:18.175406', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:18.231864', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.16799654066562653, 'timestamp': '2025-09-30 22:13:18.234104', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:18.290236', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.13806885480880737, 'timestamp': '2025-09-30 22:13:18.293268', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:18.350239', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.2504417598247528, 'timestamp': '2025-09-30 22:13:18.356139', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:18.412779', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.30002105236053467, 'timestamp': '2025-09-30 22:13:18.415904', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:18.473037', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.24092507362365723, 'timestamp': '2025-09-30 22:13:18.475559', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:18.532125', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.1479676216840744, 'timestamp': '2025-09-30 22:13:18.534514', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:18.591329', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.13936872780323029, 'timestamp': '2025-09-30 22:13:18.597262', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:18.654791', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.1464085876941681, 'timestamp': '2025-09-30 22:13:18.663814', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:18.720625', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.20140767097473145, 'timestamp': '2025-09-30 22:13:18.723298', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:18.779422', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.14010626077651978, 'timestamp': '2025-09-30 22:13:18.783393', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:18.841083', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.17002686858177185, 'timestamp': '2025-09-30 22:13:18.847891', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:18.905066', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.24259592592716217, 'timestamp': '2025-09-30 22:13:18.908267', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:18.964614', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.07928382605314255, 'timestamp': '2025-09-30 22:13:18.970566', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:19.027055', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.24631069600582123, 'timestamp': '2025-09-30 22:13:19.029193', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:19.087231', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.1957385390996933, 'timestamp': '2025-09-30 22:13:19.093764', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:19.149994', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.11540287733078003, 'timestamp': '2025-09-30 22:13:19.156058', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:19.216275', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.19177579879760742, 'timestamp': '2025-09-30 22:13:19.218503', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:19.273461', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.18964047729969025, 'timestamp': '2025-09-30 22:13:19.275815', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:19.331536', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.14671941101551056, 'timestamp': '2025-09-30 22:13:19.338656', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:19.394229', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.25480368733406067, 'timestamp': '2025-09-30 22:13:19.404173', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:19.460587', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.19676513969898224, 'timestamp': '2025-09-30 22:13:19.466149', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:19.522312', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.13940678536891937, 'timestamp': '2025-09-30 22:13:19.526533', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:19.587504', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.1716499626636505, 'timestamp': '2025-09-30 22:13:19.597723', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:19.653979', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.2013036161661148, 'timestamp': '2025-09-30 22:13:19.657610', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:19.717203', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.21723923087120056, 'timestamp': '2025-09-30 22:13:19.720589', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:19.776518', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.18032416701316833, 'timestamp': '2025-09-30 22:13:19.779179', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:19.835846', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.18369153141975403, 'timestamp': '2025-09-30 22:13:19.842308', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:19.900002', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.251202255487442, 'timestamp': '2025-09-30 22:13:19.903570', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:19.961080', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.1577962040901184, 'timestamp': '2025-09-30 22:13:19.963783', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:20.020152', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.21158874034881592, 'timestamp': '2025-09-30 22:13:20.022959', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:20.079958', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.3238627314567566, 'timestamp': '2025-09-30 22:13:20.086444', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:20.143520', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.17999853193759918, 'timestamp': '2025-09-30 22:13:20.146734', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:20.202878', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.14147445559501648, 'timestamp': '2025-09-30 22:13:20.206053', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:20.262101', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.11517177522182465, 'timestamp': '2025-09-30 22:13:20.274431', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:20.331828', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.20237389206886292, 'timestamp': '2025-09-30 22:13:20.339385', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-30 22:13:20.742267', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:20.802187', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.21765632927417755, 'timestamp': '2025-09-30 22:13:20.805878', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:20.867521', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.30007925629615784, 'timestamp': '2025-09-30 22:13:20.870092', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:20.926098', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.1890747845172882, 'timestamp': '2025-09-30 22:13:20.929285', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:20.986121', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.14304755628108978, 'timestamp': '2025-09-30 22:13:20.992369', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.047227', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.18767870962619781, 'timestamp': '2025-09-30 22:13:21.049468', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:21.107372', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.18669404089450836, 'timestamp': '2025-09-30 22:13:21.110124', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.167149', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.18169236183166504, 'timestamp': '2025-09-30 22:13:21.172464', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:21.229178', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.17111104726791382, 'timestamp': '2025-09-30 22:13:21.235416', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:21.290798', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.1716088503599167, 'timestamp': '2025-09-30 22:13:21.293649', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:21.348827', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.2050832360982895, 'timestamp': '2025-09-30 22:13:21.352182', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:13:21.408875', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.12630854547023773, 'timestamp': '2025-09-30 22:13:21.412567', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.468897', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.16735798120498657, 'timestamp': '2025-09-30 22:13:21.475802', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.531640', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.18074439465999603, 'timestamp': '2025-09-30 22:13:21.535003', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.596495', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.37173202633857727, 'timestamp': '2025-09-30 22:13:21.598972', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:21.654696', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.2454870641231537, 'timestamp': '2025-09-30 22:13:21.657360', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.713888', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.1963270604610443, 'timestamp': '2025-09-30 22:13:21.723905', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.779610', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.14868026971817017, 'timestamp': '2025-09-30 22:13:21.782615', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.840786', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.20298516750335693, 'timestamp': '2025-09-30 22:13:21.843998', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:21.901921', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.3418915867805481, 'timestamp': '2025-09-30 22:13:21.904667', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:21.962105', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.1058596819639206, 'timestamp': '2025-09-30 22:13:21.968876', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:22.027825', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.06827276945114136, 'timestamp': '2025-09-30 22:13:22.030995', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:22.092561', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.1952391266822815, 'timestamp': '2025-09-30 22:13:22.095904', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:22.155978', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.1899966448545456, 'timestamp': '2025-09-30 22:13:22.159796', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:22.220326', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.1845930516719818, 'timestamp': '2025-09-30 22:13:22.227886', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:22.286782', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.2131594866514206, 'timestamp': '2025-09-30 22:13:22.291347', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:22.349432', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.20718492567539215, 'timestamp': '2025-09-30 22:13:22.354809', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:22.412841', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.24356381595134735, 'timestamp': '2025-09-30 22:13:22.417187', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:22.478024', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.12373742461204529, 'timestamp': '2025-09-30 22:13:22.486138', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:22.542589', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.2038886398077011, 'timestamp': '2025-09-30 22:13:22.545078', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:22.601971', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.15690019726753235, 'timestamp': '2025-09-30 22:13:22.605667', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:22.664241', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.1372220516204834, 'timestamp': '2025-09-30 22:13:22.667230', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:22.730195', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.2271338552236557, 'timestamp': '2025-09-30 22:13:22.737852', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:22.794415', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.1586388498544693, 'timestamp': '2025-09-30 22:13:22.798020', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:22.855297', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.17735719680786133, 'timestamp': '2025-09-30 22:13:22.858265', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:22.914243', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.2562357485294342, 'timestamp': '2025-09-30 22:13:22.920569', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:22.979242', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.11177466809749603, 'timestamp': '2025-09-30 22:13:22.985800', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:23.041678', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.13793088495731354, 'timestamp': '2025-09-30 22:13:23.045933', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:23.101908', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.27784839272499084, 'timestamp': '2025-09-30 22:13:23.104299', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:23.162461', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.21507695317268372, 'timestamp': '2025-09-30 22:13:23.165185', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:23.220843', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.19236022233963013, 'timestamp': '2025-09-30 22:13:23.228437', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:23.296326', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.2480131834745407, 'timestamp': '2025-09-30 22:13:23.300635', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:23.358446', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.22117944061756134, 'timestamp': '2025-09-30 22:13:23.362186', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:23.421100', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.10905706882476807, 'timestamp': '2025-09-30 22:13:23.424163', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:23.483837', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.1174277663230896, 'timestamp': '2025-09-30 22:13:23.490022', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:23.547020', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.33061251044273376, 'timestamp': '2025-09-30 22:13:23.550880', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:23.608703', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.15039479732513428, 'timestamp': '2025-09-30 22:13:23.611795', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:23.668538', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.10840286314487457, 'timestamp': '2025-09-30 22:13:23.671035', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:23.729297', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.22073450684547424, 'timestamp': '2025-09-30 22:13:23.737811', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:23.795105', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.18840795755386353, 'timestamp': '2025-09-30 22:13:23.798034', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:23.853764', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.1232696995139122, 'timestamp': '2025-09-30 22:13:23.856424', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:23.912834', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.13954366743564606, 'timestamp': '2025-09-30 22:13:23.915478', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:23.971413', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.10610561817884445, 'timestamp': '2025-09-30 22:13:23.977807', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.035040', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.1686462163925171, 'timestamp': '2025-09-30 22:13:24.039157', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:24.095941', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.1746828854084015, 'timestamp': '2025-09-30 22:13:24.100291', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.162955', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.29261893033981323, 'timestamp': '2025-09-30 22:13:24.165872', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:24.224603', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.17280937731266022, 'timestamp': '2025-09-30 22:13:24.231596', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.288087', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.19245067238807678, 'timestamp': '2025-09-30 22:13:24.291829', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.351219', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.14623627066612244, 'timestamp': '2025-09-30 22:13:24.353731', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:24.413876', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.1859598457813263, 'timestamp': '2025-09-30 22:13:24.417264', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.475425', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.21537554264068604, 'timestamp': '2025-09-30 22:13:24.487222', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.545223', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.22602231800556183, 'timestamp': '2025-09-30 22:13:24.547952', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:24.604468', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.17593951523303986, 'timestamp': '2025-09-30 22:13:24.607162', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:24.669028', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.21585562825202942, 'timestamp': '2025-09-30 22:13:24.671945', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.729734', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.17400424182415009, 'timestamp': '2025-09-30 22:13:24.736453', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:24.792262', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.16866452991962433, 'timestamp': '2025-09-30 22:13:24.795665', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:24.852165', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.25665825605392456, 'timestamp': '2025-09-30 22:13:24.855268', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:24.914417', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.10906940698623657, 'timestamp': '2025-09-30 22:13:24.917890', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:24.982847', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.2896806001663208, 'timestamp': '2025-09-30 22:13:24.990645', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:25.047105', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.14539287984371185, 'timestamp': '2025-09-30 22:13:25.051705', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:25.107514', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.1850084513425827, 'timestamp': '2025-09-30 22:13:25.111856', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:25.167435', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.1369488537311554, 'timestamp': '2025-09-30 22:13:25.177135', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:25.234663', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.32081055641174316, 'timestamp': '2025-09-30 22:13:25.242045', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:25.297663', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.15152718126773834, 'timestamp': '2025-09-30 22:13:25.301500', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:13:25.381187', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.09734057635068893, 'timestamp': '2025-09-30 22:13:25.389157', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:25.450056', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.18475472927093506, 'timestamp': '2025-09-30 22:13:25.457769', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:25.523176', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.2163972109556198, 'timestamp': '2025-09-30 22:13:25.535998', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:25.596605', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.1964149922132492, 'timestamp': '2025-09-30 22:13:25.600021', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:25.659257', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.12589992582798004, 'timestamp': '2025-09-30 22:13:25.669814', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:25.728848', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.1841193437576294, 'timestamp': '2025-09-30 22:13:25.732978', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:25.792138', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.1992194652557373, 'timestamp': '2025-09-30 22:13:25.798717', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:25.854452', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.15784618258476257, 'timestamp': '2025-09-30 22:13:25.860598', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:25.928838', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.20562852919101715, 'timestamp': '2025-09-30 22:13:25.935441', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:25.994749', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.22124233841896057, 'timestamp': '2025-09-30 22:13:26.001805', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:26.065549', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.18683214485645294, 'timestamp': '2025-09-30 22:13:26.071577', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:26.129434', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.189304381608963, 'timestamp': '2025-09-30 22:13:26.134264', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:26.199092', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.18410977721214294, 'timestamp': '2025-09-30 22:13:26.202237', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:26.262736', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.27058103680610657, 'timestamp': '2025-09-30 22:13:26.265735', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:26.330569', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.16856351494789124, 'timestamp': '2025-09-30 22:13:26.339529', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:26.398467', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.14250145852565765, 'timestamp': '2025-09-30 22:13:26.404107', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:26.474055', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.20958615839481354, 'timestamp': '2025-09-30 22:13:26.477019', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:26.535607', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.19768483936786652, 'timestamp': '2025-09-30 22:13:26.543476', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:26.619235', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.2380989044904709, 'timestamp': '2025-09-30 22:13:26.627187', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:26.686623', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.15868550539016724, 'timestamp': '2025-09-30 22:13:26.690522', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:26.750408', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.30951881408691406, 'timestamp': '2025-09-30 22:13:26.753732', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:26.810100', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.13641373813152313, 'timestamp': '2025-09-30 22:13:26.812856', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:26.870841', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.22466452419757843, 'timestamp': '2025-09-30 22:13:26.878167', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:26.936409', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.1965935081243515, 'timestamp': '2025-09-30 22:13:26.939545', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:26.996614', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.15781985223293304, 'timestamp': '2025-09-30 22:13:27.003635', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:27.059997', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.198882058262825, 'timestamp': '2025-09-30 22:13:27.062637', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:27.120041', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.20001088082790375, 'timestamp': '2025-09-30 22:13:27.126479', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:27.182534', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.16072915494441986, 'timestamp': '2025-09-30 22:13:27.185086', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:27.243197', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.16711464524269104, 'timestamp': '2025-09-30 22:13:27.246609', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:13:27.304989', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.14865458011627197, 'timestamp': '2025-09-30 22:13:27.308168', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:27.365542', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.16033484041690826, 'timestamp': '2025-09-30 22:13:27.372081', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:27.427648', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.14178885519504547, 'timestamp': '2025-09-30 22:13:27.430861', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:27.487680', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.2174963802099228, 'timestamp': '2025-09-30 22:13:27.492380', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:27.550240', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.08742126822471619, 'timestamp': '2025-09-30 22:13:27.552885', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:27.611228', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.180347740650177, 'timestamp': '2025-09-30 22:13:27.619849', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:27.676583', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.1882791519165039, 'timestamp': '2025-09-30 22:13:27.679910', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:27.739028', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.2016359269618988, 'timestamp': '2025-09-30 22:13:27.743731', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:27.804753', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.18971486389636993, 'timestamp': '2025-09-30 22:13:27.808236', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:27.867947', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.21102705597877502, 'timestamp': '2025-09-30 22:13:27.874700', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:27.931368', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.14841924607753754, 'timestamp': '2025-09-30 22:13:27.934043', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:27.991671', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.16633842885494232, 'timestamp': '2025-09-30 22:13:27.995140', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.051956', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.31365668773651123, 'timestamp': '2025-09-30 22:13:28.054321', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.110237', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.15089008212089539, 'timestamp': '2025-09-30 22:13:28.120122', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.177426', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.21378591656684875, 'timestamp': '2025-09-30 22:13:28.181604', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.254634', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.1872638314962387, 'timestamp': '2025-09-30 22:13:28.257531', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:28.315517', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.16692449152469635, 'timestamp': '2025-09-30 22:13:28.318471', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:28.374343', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.1442839354276657, 'timestamp': '2025-09-30 22:13:28.381866', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:28.438549', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.23985877633094788, 'timestamp': '2025-09-30 22:13:28.441112', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:28.498002', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.19413632154464722, 'timestamp': '2025-09-30 22:13:28.500965', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:28.556847', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.19171442091464996, 'timestamp': '2025-09-30 22:13:28.560174', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.617852', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.35690295696258545, 'timestamp': '2025-09-30 22:13:28.623708', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.679673', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.16759690642356873, 'timestamp': '2025-09-30 22:13:28.681874', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.737239', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.09899739921092987, 'timestamp': '2025-09-30 22:13:28.739624', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:28.795469', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.1338987499475479, 'timestamp': '2025-09-30 22:13:28.798891', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.858542', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.12931735813617706, 'timestamp': '2025-09-30 22:13:28.865059', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:28.922343', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.16080236434936523, 'timestamp': '2025-09-30 22:13:28.927342', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:28.984281', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.1669505387544632, 'timestamp': '2025-09-30 22:13:28.986765', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:29.044523', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.12387700378894806, 'timestamp': '2025-09-30 22:13:29.047186', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:29.104845', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.18699440360069275, 'timestamp': '2025-09-30 22:13:29.110933', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:29.166543', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.17388811707496643, 'timestamp': '2025-09-30 22:13:29.169093', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:29.228831', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.1562439650297165, 'timestamp': '2025-09-30 22:13:29.231904', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:29.287736', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.18339303135871887, 'timestamp': '2025-09-30 22:13:29.290268', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:29.347133', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.1728038638830185, 'timestamp': '2025-09-30 22:13:29.353158', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:29.408242', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.2466098517179489, 'timestamp': '2025-09-30 22:13:29.412062', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:29.467927', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.2968706488609314, 'timestamp': '2025-09-30 22:13:29.470467', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:29.527583', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.2482396811246872, 'timestamp': '2025-09-30 22:13:29.530115', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:29.586533', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.22109684348106384, 'timestamp': '2025-09-30 22:13:29.592418', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:29.649607', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.1957387924194336, 'timestamp': '2025-09-30 22:13:29.652015', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:29.707480', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.1696462631225586, 'timestamp': '2025-09-30 22:13:29.710243', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:29.771666', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.1674940139055252, 'timestamp': '2025-09-30 22:13:29.774012', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:29.830571', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.2587366998195648, 'timestamp': '2025-09-30 22:13:29.836429', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:29.894618', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.11210386455059052, 'timestamp': '2025-09-30 22:13:29.898800', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:29.954941', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.11151353269815445, 'timestamp': '2025-09-30 22:13:29.957752', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.016015', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.14163097739219666, 'timestamp': '2025-09-30 22:13:30.018561', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:30.075199', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.16430304944515228, 'timestamp': '2025-09-30 22:13:30.081076', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.136471', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.17104381322860718, 'timestamp': '2025-09-30 22:13:30.138990', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.195734', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.19964008033275604, 'timestamp': '2025-09-30 22:13:30.199325', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.256065', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.18306583166122437, 'timestamp': '2025-09-30 22:13:30.259572', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.316146', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.18967285752296448, 'timestamp': '2025-09-30 22:13:30.325183', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:30.383099', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.18204742670059204, 'timestamp': '2025-09-30 22:13:30.385631', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.451169', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.1272979974746704, 'timestamp': '2025-09-30 22:13:30.453460', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.509196', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.22888515889644623, 'timestamp': '2025-09-30 22:13:30.513389', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:30.569400', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.31793516874313354, 'timestamp': '2025-09-30 22:13:30.580563', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:30.642036', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.1662132889032364, 'timestamp': '2025-09-30 22:13:30.646299', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:30.706123', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.12415353953838348, 'timestamp': '2025-09-30 22:13:30.708397', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.766985', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.1557772159576416, 'timestamp': '2025-09-30 22:13:30.770765', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:30.826603', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.15416835248470306, 'timestamp': '2025-09-30 22:13:30.834530', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:30.891955', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.16601572930812836, 'timestamp': '2025-09-30 22:13:30.898035', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:30.966700', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.19303399324417114, 'timestamp': '2025-09-30 22:13:30.969430', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:31.031258', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.13646133244037628, 'timestamp': '2025-09-30 22:13:31.033915', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:31.092201', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.12824347615242004, 'timestamp': '2025-09-30 22:13:31.100391', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:31.157782', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.14722102880477905, 'timestamp': '2025-09-30 22:13:31.160303', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:31.216893', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.14658640325069427, 'timestamp': '2025-09-30 22:13:31.219275', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:31.280537', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.19804634153842926, 'timestamp': '2025-09-30 22:13:31.284700', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:31.341633', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.2131192982196808, 'timestamp': '2025-09-30 22:13:31.349104', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:31.409579', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.11557232588529587, 'timestamp': '2025-09-30 22:13:31.412176', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:31.467986', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.2240820825099945, 'timestamp': '2025-09-30 22:13:31.472817', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:31.529604', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.13597995042800903, 'timestamp': '2025-09-30 22:13:31.534748', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:31.590866', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.20281636714935303, 'timestamp': '2025-09-30 22:13:31.597657', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:31.655137', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.18318678438663483, 'timestamp': '2025-09-30 22:13:31.660812', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:31.718929', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.1477551907300949, 'timestamp': '2025-09-30 22:13:31.722177', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:31.794278', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.2195388227701187, 'timestamp': '2025-09-30 22:13:31.797939', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:31.861876', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.18393680453300476, 'timestamp': '2025-09-30 22:13:31.874292', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:31.933080', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.12291110306978226, 'timestamp': '2025-09-30 22:13:31.936506', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:31.993945', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.14986200630664825, 'timestamp': '2025-09-30 22:13:31.996905', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:13:32.059634', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.15316301584243774, 'timestamp': '2025-09-30 22:13:32.062907', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:32.119206', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.19341474771499634, 'timestamp': '2025-09-30 22:13:32.126140', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:32.185421', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.1291682869195938, 'timestamp': '2025-09-30 22:13:32.188576', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:32.245723', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.14286953210830688, 'timestamp': '2025-09-30 22:13:32.254748', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:32.311682', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.14845415949821472, 'timestamp': '2025-09-30 22:13:32.315851', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:32.373120', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.18786677718162537, 'timestamp': '2025-09-30 22:13:32.380753', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:32.436068', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.30836644768714905, 'timestamp': '2025-09-30 22:13:32.439737', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:32.495732', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.22166289389133453, 'timestamp': '2025-09-30 22:13:32.500903', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:32.560215', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.25891613960266113, 'timestamp': '2025-09-30 22:13:32.563077', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:32.620027', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.11182696372270584, 'timestamp': '2025-09-30 22:13:32.627996', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:32.695033', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.1563013643026352, 'timestamp': '2025-09-30 22:13:32.698438', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:32.764272', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.28236889839172363, 'timestamp': '2025-09-30 22:13:32.773505', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:32.844282', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.2972184717655182, 'timestamp': '2025-09-30 22:13:32.847960', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:32.909710', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.15116369724273682, 'timestamp': '2025-09-30 22:13:32.917510', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:32.973386', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.260820209980011, 'timestamp': '2025-09-30 22:13:32.976206', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:33.036961', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.15201547741889954, 'timestamp': '2025-09-30 22:13:33.040946', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:33.101097', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.1298362761735916, 'timestamp': '2025-09-30 22:13:33.104024', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:33.170962', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.14125913381576538, 'timestamp': '2025-09-30 22:13:33.177397', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:33.237884', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.2286285161972046, 'timestamp': '2025-09-30 22:13:33.240694', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:33.296590', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.19471605122089386, 'timestamp': '2025-09-30 22:13:33.299520', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:33.364380', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.1670805811882019, 'timestamp': '2025-09-30 22:13:33.367365', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:33.424165', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.19624383747577667, 'timestamp': '2025-09-30 22:13:33.438386', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:33.494763', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.10528451949357986, 'timestamp': '2025-09-30 22:13:33.499318', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:33.559156', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.16006359457969666, 'timestamp': '2025-09-30 22:13:33.561654', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:33.619617', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.14112700521945953, 'timestamp': '2025-09-30 22:13:33.622127', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:33.679201', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.20862485468387604, 'timestamp': '2025-09-30 22:13:33.686018', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:33.745379', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.24751676619052887, 'timestamp': '2025-09-30 22:13:33.750986', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:33.809403', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.15317946672439575, 'timestamp': '2025-09-30 22:13:33.820230', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:33.878065', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.2340957075357437, 'timestamp': '2025-09-30 22:13:33.880538', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:33.938732', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.15404406189918518, 'timestamp': '2025-09-30 22:13:33.946272', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:34.002482', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.14667841792106628, 'timestamp': '2025-09-30 22:13:34.007815', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:34.065074', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.2276759147644043, 'timestamp': '2025-09-30 22:13:34.067611', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:13:34.132025', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.124529168009758, 'timestamp': '2025-09-30 22:13:34.134591', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:34.211892', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.18992789089679718, 'timestamp': '2025-09-30 22:13:34.219161', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:34.274310', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.1728891134262085, 'timestamp': '2025-09-30 22:13:34.276772', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:34.333309', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.2688790559768677, 'timestamp': '2025-09-30 22:13:34.336499', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:34.392683', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.28036344051361084, 'timestamp': '2025-09-30 22:13:34.395221', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:34.451007', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.15828822553157806, 'timestamp': '2025-09-30 22:13:34.459297', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:34.516606', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.14251922070980072, 'timestamp': '2025-09-30 22:13:34.521481', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:34.580184', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.15697482228279114, 'timestamp': '2025-09-30 22:13:34.582702', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:34.640215', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.3445532023906708, 'timestamp': '2025-09-30 22:13:34.642886', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:34.701515', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.1345483958721161, 'timestamp': '2025-09-30 22:13:34.713263', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:34.769728', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.1844899207353592, 'timestamp': '2025-09-30 22:13:34.773773', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:34.830499', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.18339109420776367, 'timestamp': '2025-09-30 22:13:34.833976', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:34.889948', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.20003929734230042, 'timestamp': '2025-09-30 22:13:34.892410', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:34.949596', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.20001806318759918, 'timestamp': '2025-09-30 22:13:34.956856', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:35.012203', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.11031382530927658, 'timestamp': '2025-09-30 22:13:35.018532', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:35.075912', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.17659924924373627, 'timestamp': '2025-09-30 22:13:35.085785', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:35.142434', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.19305554032325745, 'timestamp': '2025-09-30 22:13:35.144755', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:35.200671', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.10652148723602295, 'timestamp': '2025-09-30 22:13:35.206895', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:35.265387', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.24778905510902405, 'timestamp': '2025-09-30 22:13:35.268396', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:35.324927', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.19408337771892548, 'timestamp': '2025-09-30 22:13:35.329305', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:35.385980', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.0986558049917221, 'timestamp': '2025-09-30 22:13:35.389053', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:35.446958', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.12676288187503815, 'timestamp': '2025-09-30 22:13:35.452898', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:35.510912', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.2816906273365021, 'timestamp': '2025-09-30 22:13:35.520879', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:35.580097', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.20526006817817688, 'timestamp': '2025-09-30 22:13:35.586149', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:35.644529', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.19502800703048706, 'timestamp': '2025-09-30 22:13:35.655600', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:35.713481', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.19695843756198883, 'timestamp': '2025-09-30 22:13:35.728450', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:35.784640', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.14099062979221344, 'timestamp': '2025-09-30 22:13:35.787904', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:35.844973', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.23065468668937683, 'timestamp': '2025-09-30 22:13:35.849046', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:35.905855', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.19772589206695557, 'timestamp': '2025-09-30 22:13:35.910449', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:35.968887', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.2351270169019699, 'timestamp': '2025-09-30 22:13:35.975492', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.031950', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.1681683510541916, 'timestamp': '2025-09-30 22:13:36.035532', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:36.097265', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.17844843864440918, 'timestamp': '2025-09-30 22:13:36.099767', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.155782', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.19034771621227264, 'timestamp': '2025-09-30 22:13:36.158758', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.222888', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.168262779712677, 'timestamp': '2025-09-30 22:13:36.230070', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:36.291294', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.17501413822174072, 'timestamp': '2025-09-30 22:13:36.297931', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:36.360144', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.1573697179555893, 'timestamp': '2025-09-30 22:13:36.363190', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.419123', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.13771136105060577, 'timestamp': '2025-09-30 22:13:36.422209', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.482013', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.19367565214633942, 'timestamp': '2025-09-30 22:13:36.489177', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.545729', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.18307356536388397, 'timestamp': '2025-09-30 22:13:36.550744', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:36.607975', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.1542428433895111, 'timestamp': '2025-09-30 22:13:36.611788', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.668475', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.15291395783424377, 'timestamp': '2025-09-30 22:13:36.673276', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.730046', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.27073562145233154, 'timestamp': '2025-09-30 22:13:36.742126', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:36.799240', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.17495501041412354, 'timestamp': '2025-09-30 22:13:36.802618', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:36.858853', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.1988544911146164, 'timestamp': '2025-09-30 22:13:36.862291', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:36.918654', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.09902257472276688, 'timestamp': '2025-09-30 22:13:36.921378', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:36.976900', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.10507054626941681, 'timestamp': '2025-09-30 22:13:36.983535', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:37.039419', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.13417625427246094, 'timestamp': '2025-09-30 22:13:37.042773', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:37.098961', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.23196107149124146, 'timestamp': '2025-09-30 22:13:37.101747', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:37.162543', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.14560258388519287, 'timestamp': '2025-09-30 22:13:37.165493', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:37.223451', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.12088099867105484, 'timestamp': '2025-09-30 22:13:37.230255', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:37.285968', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.13481546938419342, 'timestamp': '2025-09-30 22:13:37.289345', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:37.353342', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.17365635931491852, 'timestamp': '2025-09-30 22:13:37.356987', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:37.416024', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.1736636757850647, 'timestamp': '2025-09-30 22:13:37.422352', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:37.478208', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.22020995616912842, 'timestamp': '2025-09-30 22:13:37.484608', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:37.540083', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.1349860429763794, 'timestamp': '2025-09-30 22:13:37.543224', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:37.603381', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.13246475160121918, 'timestamp': '2025-09-30 22:13:37.606903', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:37.663117', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.12762734293937683, 'timestamp': '2025-09-30 22:13:37.666166', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:37.722412', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.167533278465271, 'timestamp': '2025-09-30 22:13:37.735357', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:37.791833', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.12935954332351685, 'timestamp': '2025-09-30 22:13:37.795425', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:37.851524', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.2111167460680008, 'timestamp': '2025-09-30 22:13:37.857531', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:37.914712', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.14479458332061768, 'timestamp': '2025-09-30 22:13:37.921346', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:37.977677', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.20854507386684418, 'timestamp': '2025-09-30 22:13:37.984159', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:38.043932', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.23647964000701904, 'timestamp': '2025-09-30 22:13:38.046735', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:38.103686', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.25068268179893494, 'timestamp': '2025-09-30 22:13:38.109480', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:38.172634', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.2182687222957611, 'timestamp': '2025-09-30 22:13:38.177061', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:38.233756', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.16749918460845947, 'timestamp': '2025-09-30 22:13:38.245441', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:38.303738', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.2328408658504486, 'timestamp': '2025-09-30 22:13:38.307453', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:38.364026', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.19995105266571045, 'timestamp': '2025-09-30 22:13:38.370987', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:38.429168', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.20814910531044006, 'timestamp': '2025-09-30 22:13:38.432314', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:38.489773', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.18741989135742188, 'timestamp': '2025-09-30 22:13:38.496466', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:38.553673', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.2893431484699249, 'timestamp': '2025-09-30 22:13:38.556996', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:38.614207', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.1718897819519043, 'timestamp': '2025-09-30 22:13:38.621043', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:38.676643', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.16774488985538483, 'timestamp': '2025-09-30 22:13:38.682825', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:38.743888', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.21975617110729218, 'timestamp': '2025-09-30 22:13:38.757427', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:38.814036', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.15878093242645264, 'timestamp': '2025-09-30 22:13:38.819038', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:38.875995', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.08907103538513184, 'timestamp': '2025-09-30 22:13:38.878558', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:38.942405', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.16614381968975067, 'timestamp': '2025-09-30 22:13:38.945356', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:39.002088', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.12969587743282318, 'timestamp': '2025-09-30 22:13:39.009566', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.066051', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.20694226026535034, 'timestamp': '2025-09-30 22:13:39.073987', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:39.132873', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.21542461216449738, 'timestamp': '2025-09-30 22:13:39.135968', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:39.193527', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.1577359139919281, 'timestamp': '2025-09-30 22:13:39.198078', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.254032', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.21466873586177826, 'timestamp': '2025-09-30 22:13:39.261819', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.317503', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.1642138957977295, 'timestamp': '2025-09-30 22:13:39.321856', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.379226', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.23354580998420715, 'timestamp': '2025-09-30 22:13:39.385115', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:39.443950', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.13018424808979034, 'timestamp': '2025-09-30 22:13:39.449562', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.507500', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.18990586698055267, 'timestamp': '2025-09-30 22:13:39.517003', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.585044', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.27506792545318604, 'timestamp': '2025-09-30 22:13:39.595054', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:39.655667', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.2764357626438141, 'timestamp': '2025-09-30 22:13:39.671350', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.729617', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.17082937061786652, 'timestamp': '2025-09-30 22:13:39.733949', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:39.799327', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.14654140174388885, 'timestamp': '2025-09-30 22:13:39.812607', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:39.870461', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.17371809482574463, 'timestamp': '2025-09-30 22:13:39.875356', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:39.934604', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.10540507733821869, 'timestamp': '2025-09-30 22:13:39.939660', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:39.999913', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.1350107640028, 'timestamp': '2025-09-30 22:13:40.017702', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:40.076072', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.15328778326511383, 'timestamp': '2025-09-30 22:13:40.097308', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:40.157115', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.18113212287425995, 'timestamp': '2025-09-30 22:13:40.164128', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:40.225901', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.19580698013305664, 'timestamp': '2025-09-30 22:13:40.231657', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:40.290649', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.16239303350448608, 'timestamp': '2025-09-30 22:13:40.294523', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:40.354094', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.2522003650665283, 'timestamp': '2025-09-30 22:13:40.360851', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:40.418212', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.23612280189990997, 'timestamp': '2025-09-30 22:13:40.422012', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:13:40.479195', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.1896667182445526, 'timestamp': '2025-09-30 22:13:40.484121', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:40.543425', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.13975799083709717, 'timestamp': '2025-09-30 22:13:40.548418', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:40.608565', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.21059224009513855, 'timestamp': '2025-09-30 22:13:40.615748', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:40.692375', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.16023828089237213, 'timestamp': '2025-09-30 22:13:40.696950', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:13:40.759133', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.12563589215278625, 'timestamp': '2025-09-30 22:13:40.763416', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:40.828148', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.14965492486953735, 'timestamp': '2025-09-30 22:13:40.844006', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:40.904839', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.22926391661167145, 'timestamp': '2025-09-30 22:13:40.914232', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:40.973650', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.15699470043182373, 'timestamp': '2025-09-30 22:13:40.977135', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:41.042552', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.09674070030450821, 'timestamp': '2025-09-30 22:13:41.046395', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:41.107020', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.1662796437740326, 'timestamp': '2025-09-30 22:13:41.110759', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:41.173061', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.2251470386981964, 'timestamp': '2025-09-30 22:13:41.180046', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:41.236770', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.14758171141147614, 'timestamp': '2025-09-30 22:13:41.242185', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:41.308751', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.16380146145820618, 'timestamp': '2025-09-30 22:13:41.317538', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:41.380917', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.12513893842697144, 'timestamp': '2025-09-30 22:13:41.383556', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:41.444727', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.14057621359825134, 'timestamp': '2025-09-30 22:13:41.451159', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:13:41.514210', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.21863797307014465, 'timestamp': '2025-09-30 22:13:41.521427', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:41.577450', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.18586218357086182, 'timestamp': '2025-09-30 22:13:41.581319', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:41.639494', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.1830807328224182, 'timestamp': '2025-09-30 22:13:41.646535', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:41.708589', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.1036299392580986, 'timestamp': '2025-09-30 22:13:41.716256', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:41.781799', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.15674616396427155, 'timestamp': '2025-09-30 22:13:41.784207', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:41.842886', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.07879795134067535, 'timestamp': '2025-09-30 22:13:41.845972', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:41.902170', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.16468533873558044, 'timestamp': '2025-09-30 22:13:41.906014', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:41.962973', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.1792893409729004, 'timestamp': '2025-09-30 22:13:41.969608', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:42.028418', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.23700574040412903, 'timestamp': '2025-09-30 22:13:42.031308', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:42.090215', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.2179095596075058, 'timestamp': '2025-09-30 22:13:42.095220', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:42.154550', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.14568710327148438, 'timestamp': '2025-09-30 22:13:42.159992', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:42.220904', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.20466236770153046, 'timestamp': '2025-09-30 22:13:42.228409', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:42.284597', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.11546346545219421, 'timestamp': '2025-09-30 22:13:42.293987', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:13:42.352639', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.2278895229101181, 'timestamp': '2025-09-30 22:13:42.360990', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:42.422839', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.16552697122097015, 'timestamp': '2025-09-30 22:13:42.425775', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:42.482156', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.2253979593515396, 'timestamp': '2025-09-30 22:13:42.492884', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:42.552191', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.14160700142383575, 'timestamp': '2025-09-30 22:13:42.556639', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:42.614047', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.1638268232345581, 'timestamp': '2025-09-30 22:13:42.622935', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:42.683677', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.16272705793380737, 'timestamp': '2025-09-30 22:13:42.687490', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:42.744095', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.19698911905288696, 'timestamp': '2025-09-30 22:13:42.754472', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:42.815560', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.16297265887260437, 'timestamp': '2025-09-30 22:13:42.818598', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:42.875729', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.20233942568302155, 'timestamp': '2025-09-30 22:13:42.879120', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:42.936446', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.14898230135440826, 'timestamp': '2025-09-30 22:13:42.939471', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:42.995764', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.188101127743721, 'timestamp': '2025-09-30 22:13:43.003369', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:43.059469', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.20079800486564636, 'timestamp': '2025-09-30 22:13:43.067758', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:43.126863', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.16472026705741882, 'timestamp': '2025-09-30 22:13:43.132179', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:43.192034', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.18177451193332672, 'timestamp': '2025-09-30 22:13:43.198266', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:43.259496', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.19019636511802673, 'timestamp': '2025-09-30 22:13:43.265729', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:43.322441', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.24934346973896027, 'timestamp': '2025-09-30 22:13:43.325936', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:43.383005', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.1376076340675354, 'timestamp': '2025-09-30 22:13:43.385880', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:43.442330', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.16909140348434448, 'timestamp': '2025-09-30 22:13:43.445539', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:43.505637', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.13829255104064941, 'timestamp': '2025-09-30 22:13:43.513210', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:43.570077', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.14801724255084991, 'timestamp': '2025-09-30 22:13:43.580368', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:43.641573', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.0838601365685463, 'timestamp': '2025-09-30 22:13:43.644812', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:43.712359', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.10216476768255234, 'timestamp': '2025-09-30 22:13:43.721007', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:43.781318', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.14029930531978607, 'timestamp': '2025-09-30 22:13:43.791187', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:43.846263', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.14679792523384094, 'timestamp': '2025-09-30 22:13:43.850734', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:43.906642', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.19037102162837982, 'timestamp': '2025-09-30 22:13:43.911803', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:43.968993', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.18369722366333008, 'timestamp': '2025-09-30 22:13:43.972926', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:44.030181', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.3208064138889313, 'timestamp': '2025-09-30 22:13:44.039867', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:13:58.337267', 'step': 1864, 'epoch': 1} {'type': 'pplx', 'content': 10891.538735533548, 'timestamp': '2025-09-30 22:13:58.353472', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:58.415720', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.18450863659381866, 'timestamp': '2025-09-30 22:13:58.421276', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:58.479799', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.25267890095710754, 'timestamp': '2025-09-30 22:13:58.482165', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:58.542008', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.08882492780685425, 'timestamp': '2025-09-30 22:13:58.544174', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:58.601646', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.26314425468444824, 'timestamp': '2025-09-30 22:13:58.608245', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:58.664785', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.1269337385892868, 'timestamp': '2025-09-30 22:13:58.667682', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:58.724007', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.18090268969535828, 'timestamp': '2025-09-30 22:13:58.726998', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:58.783743', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.2102673500776291, 'timestamp': '2025-09-30 22:13:58.790255', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:58.851903', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.16977424919605255, 'timestamp': '2025-09-30 22:13:58.865411', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:58.923067', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.24258515238761902, 'timestamp': '2025-09-30 22:13:58.928176', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:13:58.986151', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.13929827511310577, 'timestamp': '2025-09-30 22:13:58.988906', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:59.046848', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.17636260390281677, 'timestamp': '2025-09-30 22:13:59.049485', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:59.108376', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.14298951625823975, 'timestamp': '2025-09-30 22:13:59.124354', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:59.184326', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.1943269520998001, 'timestamp': '2025-09-30 22:13:59.188262', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:59.244612', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.1356787383556366, 'timestamp': '2025-09-30 22:13:59.247879', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:59.308667', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.12175050377845764, 'timestamp': '2025-09-30 22:13:59.312446', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:59.373732', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.19405321776866913, 'timestamp': '2025-09-30 22:13:59.380737', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:59.439606', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.2175620049238205, 'timestamp': '2025-09-30 22:13:59.443356', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:59.507302', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.10249370336532593, 'timestamp': '2025-09-30 22:13:59.513384', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:59.584872', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.1815357804298401, 'timestamp': '2025-09-30 22:13:59.596245', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:59.655840', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.174319788813591, 'timestamp': '2025-09-30 22:13:59.670424', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:13:59.735736', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.14902997016906738, 'timestamp': '2025-09-30 22:13:59.739517', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:13:59.802186', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.16637277603149414, 'timestamp': '2025-09-30 22:13:59.814086', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:59.893257', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.14537377655506134, 'timestamp': '2025-09-30 22:13:59.898177', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:13:59.956166', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.22520649433135986, 'timestamp': '2025-09-30 22:13:59.978130', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:00.065455', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.13137662410736084, 'timestamp': '2025-09-30 22:14:00.068812', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:00.141667', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.24019886553287506, 'timestamp': '2025-09-30 22:14:00.156944', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:00.225075', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.177150696516037, 'timestamp': '2025-09-30 22:14:00.233968', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:00.293153', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.22165103256702423, 'timestamp': '2025-09-30 22:14:00.300987', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:00.360128', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.12782399356365204, 'timestamp': '2025-09-30 22:14:00.370029', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:00.427022', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.19980213046073914, 'timestamp': '2025-09-30 22:14:00.432029', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:00.496442', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.16893745958805084, 'timestamp': '2025-09-30 22:14:00.505885', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:00.567997', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.2203923463821411, 'timestamp': '2025-09-30 22:14:00.575477', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:00.632192', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.1547509729862213, 'timestamp': '2025-09-30 22:14:00.645865', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:00.720605', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.12296320497989655, 'timestamp': '2025-09-30 22:14:00.723619', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:00.781154', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.1302366852760315, 'timestamp': '2025-09-30 22:14:00.787918', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:00.847348', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.1927732229232788, 'timestamp': '2025-09-30 22:14:00.853748', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:00.909725', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.2129630595445633, 'timestamp': '2025-09-30 22:14:00.915008', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:00.971867', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.29818883538246155, 'timestamp': '2025-09-30 22:14:00.979409', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:01.045987', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.2443869411945343, 'timestamp': '2025-09-30 22:14:01.051521', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:01.108357', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.28055351972579956, 'timestamp': '2025-09-30 22:14:01.119759', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:01.179128', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.18964658677577972, 'timestamp': '2025-09-30 22:14:01.181930', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:01.239732', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.21243101358413696, 'timestamp': '2025-09-30 22:14:01.242913', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:01.299835', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.17910175025463104, 'timestamp': '2025-09-30 22:14:01.302745', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:01.360309', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.23291577398777008, 'timestamp': '2025-09-30 22:14:01.367993', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:01.427363', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.20273296535015106, 'timestamp': '2025-09-30 22:14:01.433612', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:01.491043', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.21213595569133759, 'timestamp': '2025-09-30 22:14:01.499279', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:01.562778', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.16787700355052948, 'timestamp': '2025-09-30 22:14:01.572352', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:01.636397', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.25241920351982117, 'timestamp': '2025-09-30 22:14:01.642755', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:01.705192', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.14937733113765717, 'timestamp': '2025-09-30 22:14:01.711952', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:01.773429', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.2172737419605255, 'timestamp': '2025-09-30 22:14:01.778850', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:01.835373', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.26334843039512634, 'timestamp': '2025-09-30 22:14:01.838763', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:01.898347', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.15752816200256348, 'timestamp': '2025-09-30 22:14:01.909725', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:01.967001', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.15537038445472717, 'timestamp': '2025-09-30 22:14:01.971000', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:02.028476', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.19592544436454773, 'timestamp': '2025-09-30 22:14:02.031921', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:02.101981', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.15343470871448517, 'timestamp': '2025-09-30 22:14:02.104088', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:02.165881', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.20066939294338226, 'timestamp': '2025-09-30 22:14:02.174643', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:02.235432', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.24681071937084198, 'timestamp': '2025-09-30 22:14:02.241074', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:02.304136', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.14581221342086792, 'timestamp': '2025-09-30 22:14:02.307148', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:02.365018', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.11462324112653732, 'timestamp': '2025-09-30 22:14:02.368750', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:02.429182', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.226763516664505, 'timestamp': '2025-09-30 22:14:02.435946', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:02.495546', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.22385744750499725, 'timestamp': '2025-09-30 22:14:02.498907', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:02.555901', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.24096707999706268, 'timestamp': '2025-09-30 22:14:02.563041', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:02.622939', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.12425899505615234, 'timestamp': '2025-09-30 22:14:02.628160', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:02.685449', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.24022617936134338, 'timestamp': '2025-09-30 22:14:02.695286', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:02.755573', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.22559724748134613, 'timestamp': '2025-09-30 22:14:02.758213', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:02.817083', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.1832108050584793, 'timestamp': '2025-09-30 22:14:02.821792', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:02.878731', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.19919325411319733, 'timestamp': '2025-09-30 22:14:02.884749', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:02.943391', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.1254982203245163, 'timestamp': '2025-09-30 22:14:02.951454', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:03.008103', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.21985089778900146, 'timestamp': '2025-09-30 22:14:03.010664', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:03.067517', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.18428748846054077, 'timestamp': '2025-09-30 22:14:03.070447', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:03.127943', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.12712237238883972, 'timestamp': '2025-09-30 22:14:03.134409', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:03.191323', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.2507637143135071, 'timestamp': '2025-09-30 22:14:03.199818', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:03.258497', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.15808485448360443, 'timestamp': '2025-09-30 22:14:03.265172', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:03.336348', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.26290562748908997, 'timestamp': '2025-09-30 22:14:03.342179', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:03.401715', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.19145672023296356, 'timestamp': '2025-09-30 22:14:03.407877', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:03.465318', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.16932158172130585, 'timestamp': '2025-09-30 22:14:03.471629', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:03.530108', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.26567158102989197, 'timestamp': '2025-09-30 22:14:03.532698', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:03.592147', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.19531942903995514, 'timestamp': '2025-09-30 22:14:03.597756', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:03.659507', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.15786601603031158, 'timestamp': '2025-09-30 22:14:03.662232', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:03.720471', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.2294335961341858, 'timestamp': '2025-09-30 22:14:03.728768', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:03.784836', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.28102633357048035, 'timestamp': '2025-09-30 22:14:03.788438', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:03.845894', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.14540308713912964, 'timestamp': '2025-09-30 22:14:03.848532', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:03.905167', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.1875111311674118, 'timestamp': '2025-09-30 22:14:03.910467', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:03.967572', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.18106120824813843, 'timestamp': '2025-09-30 22:14:03.978181', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:04.041559', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.31279653310775757, 'timestamp': '2025-09-30 22:14:04.045073', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:04.106480', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.17606495320796967, 'timestamp': '2025-09-30 22:14:04.108975', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:04.166018', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.2857856750488281, 'timestamp': '2025-09-30 22:14:04.168924', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:04.228906', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.12816426157951355, 'timestamp': '2025-09-30 22:14:04.235186', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:04.291969', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.16238634288311005, 'timestamp': '2025-09-30 22:14:04.294293', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:04.351470', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.10720708221197128, 'timestamp': '2025-09-30 22:14:04.354772', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:04.437684', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.13968054950237274, 'timestamp': '2025-09-30 22:14:04.443207', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:04.502079', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.1561409831047058, 'timestamp': '2025-09-30 22:14:04.511484', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:04.583100', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.2045622617006302, 'timestamp': '2025-09-30 22:14:04.593534', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:04.655972', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.1955234408378601, 'timestamp': '2025-09-30 22:14:04.660829', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:04.718747', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.18016807734966278, 'timestamp': '2025-09-30 22:14:04.725659', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:04.788294', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.19751092791557312, 'timestamp': '2025-09-30 22:14:04.800136', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:04.864932', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.13919584453105927, 'timestamp': '2025-09-30 22:14:04.873909', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:04.934376', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.15692360699176788, 'timestamp': '2025-09-30 22:14:04.936636', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:05.003587', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.14085029065608978, 'timestamp': '2025-09-30 22:14:05.006076', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:05.080016', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.12800823152065277, 'timestamp': '2025-09-30 22:14:05.087527', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:05.147420', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.15445856750011444, 'timestamp': '2025-09-30 22:14:05.151418', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:05.207811', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.12347297370433807, 'timestamp': '2025-09-30 22:14:05.210748', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:05.268037', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.3579752743244171, 'timestamp': '2025-09-30 22:14:05.276417', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:05.340377', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.1509895920753479, 'timestamp': '2025-09-30 22:14:05.346240', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:05.407533', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.17290736734867096, 'timestamp': '2025-09-30 22:14:05.410570', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:05.467333', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.14729496836662292, 'timestamp': '2025-09-30 22:14:05.469625', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:05.526195', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.2501665949821472, 'timestamp': '2025-09-30 22:14:05.529570', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:05.588114', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.10820262134075165, 'timestamp': '2025-09-30 22:14:05.595915', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:05.652956', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.1836611032485962, 'timestamp': '2025-09-30 22:14:05.656547', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:05.718293', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.2157028317451477, 'timestamp': '2025-09-30 22:14:05.721696', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:05.778436', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.15599799156188965, 'timestamp': '2025-09-30 22:14:05.780776', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:05.849089', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.1489897072315216, 'timestamp': '2025-09-30 22:14:05.855400', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:05.912057', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.18040867149829865, 'timestamp': '2025-09-30 22:14:05.916061', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:05.988831', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.16653195023536682, 'timestamp': '2025-09-30 22:14:05.991543', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:06.051300', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.24878311157226562, 'timestamp': '2025-09-30 22:14:06.055977', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:06.118071', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.1508757770061493, 'timestamp': '2025-09-30 22:14:06.124025', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:06.180120', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.18779125809669495, 'timestamp': '2025-09-30 22:14:06.182912', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:06.240920', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.2639533579349518, 'timestamp': '2025-09-30 22:14:06.245955', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:06.304166', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.23166696727275848, 'timestamp': '2025-09-30 22:14:06.309203', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:06.366020', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.24653321504592896, 'timestamp': '2025-09-30 22:14:06.371974', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:06.430096', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.16584183275699615, 'timestamp': '2025-09-30 22:14:06.432460', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:06.489751', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.13503265380859375, 'timestamp': '2025-09-30 22:14:06.495155', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:06.551728', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.2480970025062561, 'timestamp': '2025-09-30 22:14:06.555269', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:06.613972', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.22822090983390808, 'timestamp': '2025-09-30 22:14:06.620186', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:06.679328', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.2897017300128937, 'timestamp': '2025-09-30 22:14:06.682625', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:06.738947', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.1488199532032013, 'timestamp': '2025-09-30 22:14:06.741416', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:06.799004', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.16480042040348053, 'timestamp': '2025-09-30 22:14:06.801878', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:06.862921', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.16731266677379608, 'timestamp': '2025-09-30 22:14:06.869591', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:06.925438', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.17312686145305634, 'timestamp': '2025-09-30 22:14:06.928445', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:06.985400', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.19486260414123535, 'timestamp': '2025-09-30 22:14:06.988190', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:07.044955', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.22862428426742554, 'timestamp': '2025-09-30 22:14:07.047541', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:07.103531', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.18114504218101501, 'timestamp': '2025-09-30 22:14:07.109645', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:07.164908', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.21318978071212769, 'timestamp': '2025-09-30 22:14:07.167395', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:07.223191', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.2641870081424713, 'timestamp': '2025-09-30 22:14:07.225576', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:07.284111', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.1600443720817566, 'timestamp': '2025-09-30 22:14:07.286538', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:07.342937', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.17599047720432281, 'timestamp': '2025-09-30 22:14:07.349949', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-30 22:14:07.818597', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:07.879019', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.14458097517490387, 'timestamp': '2025-09-30 22:14:07.882610', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:07.942366', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.22817756235599518, 'timestamp': '2025-09-30 22:14:07.945263', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:08.003596', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.18136891722679138, 'timestamp': '2025-09-30 22:14:08.007623', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:08.065894', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.22036348283290863, 'timestamp': '2025-09-30 22:14:08.072123', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:08.131307', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.1325026899576187, 'timestamp': '2025-09-30 22:14:08.134143', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:08.191118', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.16190600395202637, 'timestamp': '2025-09-30 22:14:08.193710', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:08.251218', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.21987204253673553, 'timestamp': '2025-09-30 22:14:08.253376', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:08.314055', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.18179510533809662, 'timestamp': '2025-09-30 22:14:08.321857', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:08.378593', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.14109990000724792, 'timestamp': '2025-09-30 22:14:08.381183', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:08.438975', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.20067311823368073, 'timestamp': '2025-09-30 22:14:08.441973', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:08.499465', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.10674848407506943, 'timestamp': '2025-09-30 22:14:08.502255', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:08.559871', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.15043361485004425, 'timestamp': '2025-09-30 22:14:08.566710', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:08.628599', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.27609845995903015, 'timestamp': '2025-09-30 22:14:08.631808', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:08.692894', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.23636561632156372, 'timestamp': '2025-09-30 22:14:08.695828', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:08.755296', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.13942237198352814, 'timestamp': '2025-09-30 22:14:08.763023', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:08.820966', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.12451283633708954, 'timestamp': '2025-09-30 22:14:08.827505', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:08.883895', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.11448613554239273, 'timestamp': '2025-09-30 22:14:08.892538', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:08.949837', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.21191807091236115, 'timestamp': '2025-09-30 22:14:08.952735', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.012387', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.18121175467967987, 'timestamp': '2025-09-30 22:14:09.016942', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.076332', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.2735216021537781, 'timestamp': '2025-09-30 22:14:09.082545', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.144630', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.1774701327085495, 'timestamp': '2025-09-30 22:14:09.147357', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.204850', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.16255256533622742, 'timestamp': '2025-09-30 22:14:09.207458', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.268270', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.2352248579263687, 'timestamp': '2025-09-30 22:14:09.270598', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:09.329208', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.1665632277727127, 'timestamp': '2025-09-30 22:14:09.336994', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:09.393908', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.2343018651008606, 'timestamp': '2025-09-30 22:14:09.397216', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:09.456581', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.2235213965177536, 'timestamp': '2025-09-30 22:14:09.459173', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.516051', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.1764640361070633, 'timestamp': '2025-09-30 22:14:09.518773', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.575920', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.13742081820964813, 'timestamp': '2025-09-30 22:14:09.584894', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.641897', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.16095615923404694, 'timestamp': '2025-09-30 22:14:09.644477', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.705904', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.1700524389743805, 'timestamp': '2025-09-30 22:14:09.709244', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:09.766545', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.12228728830814362, 'timestamp': '2025-09-30 22:14:09.773064', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:09.830924', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.14322106540203094, 'timestamp': '2025-09-30 22:14:09.837770', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:09.894847', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.2837720215320587, 'timestamp': '2025-09-30 22:14:09.897394', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:09.953838', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.1521352082490921, 'timestamp': '2025-09-30 22:14:09.959760', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:10.017052', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.09718123078346252, 'timestamp': '2025-09-30 22:14:10.019936', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:10.076050', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.2319791316986084, 'timestamp': '2025-09-30 22:14:10.082989', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:10.143665', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.16819487512111664, 'timestamp': '2025-09-30 22:14:10.150893', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:10.208190', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.27481940388679504, 'timestamp': '2025-09-30 22:14:10.216423', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:10.278045', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.21605414152145386, 'timestamp': '2025-09-30 22:14:10.282568', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:10.339472', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.13816939294338226, 'timestamp': '2025-09-30 22:14:10.346440', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:10.402266', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.17277024686336517, 'timestamp': '2025-09-30 22:14:10.405160', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:10.461186', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.13699764013290405, 'timestamp': '2025-09-30 22:14:10.464270', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:10.524143', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.2351464182138443, 'timestamp': '2025-09-30 22:14:10.529812', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:10.585683', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.10925646126270294, 'timestamp': '2025-09-30 22:14:10.592419', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:10.652151', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.19496138393878937, 'timestamp': '2025-09-30 22:14:10.655433', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:10.711868', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.09377196431159973, 'timestamp': '2025-09-30 22:14:10.716120', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:10.772783', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.1672436147928238, 'timestamp': '2025-09-30 22:14:10.776752', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:10.835219', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.23400743305683136, 'timestamp': '2025-09-30 22:14:10.841923', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:10.900277', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.12460564076900482, 'timestamp': '2025-09-30 22:14:10.905223', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:10.963735', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.1753915399312973, 'timestamp': '2025-09-30 22:14:10.966760', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:11.025011', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.2171512097120285, 'timestamp': '2025-09-30 22:14:11.027396', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:11.083865', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.17523832619190216, 'timestamp': '2025-09-30 22:14:11.090424', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:11.146117', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.2335251420736313, 'timestamp': '2025-09-30 22:14:11.148146', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:11.204802', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.16719849407672882, 'timestamp': '2025-09-30 22:14:11.209622', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:11.271398', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.11780858039855957, 'timestamp': '2025-09-30 22:14:11.274529', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:11.331102', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.1214093342423439, 'timestamp': '2025-09-30 22:14:11.338902', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:11.402007', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.1303054690361023, 'timestamp': '2025-09-30 22:14:11.407009', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:11.464171', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.2168097198009491, 'timestamp': '2025-09-30 22:14:11.466726', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:11.523402', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.12067963182926178, 'timestamp': '2025-09-30 22:14:11.527083', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:11.583603', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.13157495856285095, 'timestamp': '2025-09-30 22:14:11.589545', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:11.647008', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.1384696364402771, 'timestamp': '2025-09-30 22:14:11.650065', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:11.707034', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.13635776937007904, 'timestamp': '2025-09-30 22:14:11.710635', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:11.767974', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.20306706428527832, 'timestamp': '2025-09-30 22:14:11.772255', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:11.829716', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.23595917224884033, 'timestamp': '2025-09-30 22:14:11.836981', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:11.894037', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.1729293018579483, 'timestamp': '2025-09-30 22:14:11.897181', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:11.961751', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.1471395641565323, 'timestamp': '2025-09-30 22:14:11.964764', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:12.023116', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.17245222628116608, 'timestamp': '2025-09-30 22:14:12.028594', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:12.085421', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.14939728379249573, 'timestamp': '2025-09-30 22:14:12.092346', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:12.148445', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.23695966601371765, 'timestamp': '2025-09-30 22:14:12.151576', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:12.209917', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.16471372544765472, 'timestamp': '2025-09-30 22:14:12.212803', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:12.273527', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.23848101496696472, 'timestamp': '2025-09-30 22:14:12.276581', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:12.331905', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.21000327169895172, 'timestamp': '2025-09-30 22:14:12.338038', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:12.393249', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.1390983909368515, 'timestamp': '2025-09-30 22:14:12.397682', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:12.453740', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.16301029920578003, 'timestamp': '2025-09-30 22:14:12.459135', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:12.515726', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.12666407227516174, 'timestamp': '2025-09-30 22:14:12.518318', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:12.578032', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.1353764683008194, 'timestamp': '2025-09-30 22:14:12.584484', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:12.640971', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.29704549908638, 'timestamp': '2025-09-30 22:14:12.643471', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:12.699459', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.21307235956192017, 'timestamp': '2025-09-30 22:14:12.702093', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:12.759568', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.12574657797813416, 'timestamp': '2025-09-30 22:14:12.764101', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:12.825420', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.19528597593307495, 'timestamp': '2025-09-30 22:14:12.831983', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:12.890263', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.1905476301908493, 'timestamp': '2025-09-30 22:14:12.893525', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:12.950410', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.13868927955627441, 'timestamp': '2025-09-30 22:14:12.953204', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.010341', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.1426984965801239, 'timestamp': '2025-09-30 22:14:13.013676', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.069972', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.1404392123222351, 'timestamp': '2025-09-30 22:14:13.077110', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:13.133945', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.13455159962177277, 'timestamp': '2025-09-30 22:14:13.137039', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:13.197019', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.14481250941753387, 'timestamp': '2025-09-30 22:14:13.200305', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.273034', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.22661352157592773, 'timestamp': '2025-09-30 22:14:13.276248', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:13.332873', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.12444799393415451, 'timestamp': '2025-09-30 22:14:13.339499', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.397276', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.1719779223203659, 'timestamp': '2025-09-30 22:14:13.401039', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.458169', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.12340442091226578, 'timestamp': '2025-09-30 22:14:13.461772', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.518858', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.21180593967437744, 'timestamp': '2025-09-30 22:14:13.521977', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.578262', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.21827250719070435, 'timestamp': '2025-09-30 22:14:13.584683', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:13.642731', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.27589359879493713, 'timestamp': '2025-09-30 22:14:13.649880', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.713592', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.18020428717136383, 'timestamp': '2025-09-30 22:14:13.721453', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.778450', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.1362857073545456, 'timestamp': '2025-09-30 22:14:13.782753', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:13.842590', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.11242898553609848, 'timestamp': '2025-09-30 22:14:13.850702', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.914237', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.17850758135318756, 'timestamp': '2025-09-30 22:14:13.918393', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:13.974899', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.2697998881340027, 'timestamp': '2025-09-30 22:14:13.984635', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:14.043033', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.18616895377635956, 'timestamp': '2025-09-30 22:14:14.048025', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:14.105449', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.14634022116661072, 'timestamp': '2025-09-30 22:14:14.115004', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:14.173199', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.17305460572242737, 'timestamp': '2025-09-30 22:14:14.179910', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:14.238351', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.23973597586154938, 'timestamp': '2025-09-30 22:14:14.241515', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:14.301217', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.18816813826560974, 'timestamp': '2025-09-30 22:14:14.306259', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:14.363831', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.17076362669467926, 'timestamp': '2025-09-30 22:14:14.372105', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:14.430264', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.1540537029504776, 'timestamp': '2025-09-30 22:14:14.435418', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:14.491946', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.19511425495147705, 'timestamp': '2025-09-30 22:14:14.500610', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:14.562204', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.19886451959609985, 'timestamp': '2025-09-30 22:14:14.565449', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:14.623314', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.19279548525810242, 'timestamp': '2025-09-30 22:14:14.629587', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:14.685581', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.19713681936264038, 'timestamp': '2025-09-30 22:14:14.689503', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:14.746376', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.11391384154558182, 'timestamp': '2025-09-30 22:14:14.749535', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:14.806204', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.23872099816799164, 'timestamp': '2025-09-30 22:14:14.810537', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:14.868960', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.16517889499664307, 'timestamp': '2025-09-30 22:14:14.877273', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:14.935104', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.14810000360012054, 'timestamp': '2025-09-30 22:14:14.939491', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:14.996958', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.15367606282234192, 'timestamp': '2025-09-30 22:14:15.000848', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:15.057645', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.3050350546836853, 'timestamp': '2025-09-30 22:14:15.062325', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:15.123944', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.24714913964271545, 'timestamp': '2025-09-30 22:14:15.137356', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:15.198710', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.20025724172592163, 'timestamp': '2025-09-30 22:14:15.202214', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:15.265071', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.1554628610610962, 'timestamp': '2025-09-30 22:14:15.269570', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:15.328377', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.2435934990644455, 'timestamp': '2025-09-30 22:14:15.331555', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:15.391362', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.18774884939193726, 'timestamp': '2025-09-30 22:14:15.399137', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:15.473233', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.1323210448026657, 'timestamp': '2025-09-30 22:14:15.475607', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:15.538262', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.1892472356557846, 'timestamp': '2025-09-30 22:14:15.544127', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:15.603976', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.16729618608951569, 'timestamp': '2025-09-30 22:14:15.608927', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:15.670009', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.1579575538635254, 'timestamp': '2025-09-30 22:14:15.680275', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:15.737740', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.16793513298034668, 'timestamp': '2025-09-30 22:14:15.740442', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:15.797005', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.16190746426582336, 'timestamp': '2025-09-30 22:14:15.800900', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:15.860742', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.13154654204845428, 'timestamp': '2025-09-30 22:14:15.866221', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:15.926569', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.30474787950515747, 'timestamp': '2025-09-30 22:14:15.933012', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:16.003363', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.2636187970638275, 'timestamp': '2025-09-30 22:14:16.006247', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:16.062676', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.1707722544670105, 'timestamp': '2025-09-30 22:14:16.068537', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:16.142631', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.16741427779197693, 'timestamp': '2025-09-30 22:14:16.145510', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:16.203557', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.2148219645023346, 'timestamp': '2025-09-30 22:14:16.214325', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:16.272275', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.12826745212078094, 'timestamp': '2025-09-30 22:14:16.283505', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:16.343836', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.12077677249908447, 'timestamp': '2025-09-30 22:14:16.348973', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:16.417524', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.3472428321838379, 'timestamp': '2025-09-30 22:14:16.423878', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:16.482300', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.110147625207901, 'timestamp': '2025-09-30 22:14:16.490342', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:16.547276', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.1777591109275818, 'timestamp': '2025-09-30 22:14:16.550856', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:16.608057', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.22162888944149017, 'timestamp': '2025-09-30 22:14:16.613492', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:16.672451', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.09365921467542648, 'timestamp': '2025-09-30 22:14:16.677462', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:16.736627', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.19825322926044464, 'timestamp': '2025-09-30 22:14:16.747803', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:16.805855', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.17107006907463074, 'timestamp': '2025-09-30 22:14:16.812684', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:16.870927', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.12791158258914948, 'timestamp': '2025-09-30 22:14:16.873402', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:16.936692', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.18002906441688538, 'timestamp': '2025-09-30 22:14:16.939639', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:16.997572', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.17490164935588837, 'timestamp': '2025-09-30 22:14:17.007929', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:17.065359', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.16348613798618317, 'timestamp': '2025-09-30 22:14:17.068445', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:17.126067', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.17187759280204773, 'timestamp': '2025-09-30 22:14:17.129929', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:17.187953', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.13095365464687347, 'timestamp': '2025-09-30 22:14:17.197097', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:17.254158', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.19824615120887756, 'timestamp': '2025-09-30 22:14:17.260768', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:17.318089', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.15721794962882996, 'timestamp': '2025-09-30 22:14:17.320935', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:17.377458', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.12716707587242126, 'timestamp': '2025-09-30 22:14:17.380526', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:17.437989', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.25914743542671204, 'timestamp': '2025-09-30 22:14:17.444265', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:17.499834', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.15297065675258636, 'timestamp': '2025-09-30 22:14:17.507289', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:17.568604', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.10927975922822952, 'timestamp': '2025-09-30 22:14:17.571841', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:17.635975', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.14370541274547577, 'timestamp': '2025-09-30 22:14:17.639768', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:17.696302', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.15212886035442352, 'timestamp': '2025-09-30 22:14:17.699545', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:17.756091', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.23657609522342682, 'timestamp': '2025-09-30 22:14:17.762371', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:17.818208', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.23226210474967957, 'timestamp': '2025-09-30 22:14:17.821147', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:17.878283', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.1496981680393219, 'timestamp': '2025-09-30 22:14:17.881268', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:17.941080', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.25252023339271545, 'timestamp': '2025-09-30 22:14:17.944401', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:18.000927', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.14877578616142273, 'timestamp': '2025-09-30 22:14:18.007851', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:18.064850', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.1647173911333084, 'timestamp': '2025-09-30 22:14:18.068023', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:18.125026', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.23467794060707092, 'timestamp': '2025-09-30 22:14:18.128143', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:18.187176', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.2351974993944168, 'timestamp': '2025-09-30 22:14:18.190516', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:18.247998', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.2150208204984665, 'timestamp': '2025-09-30 22:14:18.255134', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:18.310456', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.1921730637550354, 'timestamp': '2025-09-30 22:14:18.318356', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:18.378445', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.1337611824274063, 'timestamp': '2025-09-30 22:14:18.381214', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:18.437719', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.17115727066993713, 'timestamp': '2025-09-30 22:14:18.440232', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:18.496145', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.26999592781066895, 'timestamp': '2025-09-30 22:14:18.504564', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:18.560967', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.2230636030435562, 'timestamp': '2025-09-30 22:14:18.563539', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:18.620939', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.16194434463977814, 'timestamp': '2025-09-30 22:14:18.624899', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:18.681965', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.23483990132808685, 'timestamp': '2025-09-30 22:14:18.686087', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:18.743707', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.3033471703529358, 'timestamp': '2025-09-30 22:14:18.750270', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:18.806389', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.20451121032238007, 'timestamp': '2025-09-30 22:14:18.809398', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:18.866371', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.2066100537776947, 'timestamp': '2025-09-30 22:14:18.869112', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:18.925651', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.2469276487827301, 'timestamp': '2025-09-30 22:14:18.931539', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:18.988237', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.18389688432216644, 'timestamp': '2025-09-30 22:14:18.994907', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:19.062389', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.1447611153125763, 'timestamp': '2025-09-30 22:14:19.065260', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.121828', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.12331198900938034, 'timestamp': '2025-09-30 22:14:19.124614', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.184910', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.12017843872308731, 'timestamp': '2025-09-30 22:14:19.189577', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:19.247175', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.2330736666917801, 'timestamp': '2025-09-30 22:14:19.253459', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:19.309971', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.16634269058704376, 'timestamp': '2025-09-30 22:14:19.312756', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.378902', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.29184016585350037, 'timestamp': '2025-09-30 22:14:19.381919', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.439846', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.11594364792108536, 'timestamp': '2025-09-30 22:14:19.447408', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.503973', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.24798583984375, 'timestamp': '2025-09-30 22:14:19.515316', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.573997', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.2650112509727478, 'timestamp': '2025-09-30 22:14:19.577268', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:19.635187', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.21066804230213165, 'timestamp': '2025-09-30 22:14:19.645153', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.702866', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.15745559334754944, 'timestamp': '2025-09-30 22:14:19.706092', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:19.764076', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.2619640529155731, 'timestamp': '2025-09-30 22:14:19.770571', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:19.832513', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.16644833981990814, 'timestamp': '2025-09-30 22:14:19.835718', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:19.892454', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.15342189371585846, 'timestamp': '2025-09-30 22:14:19.897713', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:19.954474', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.15826496481895447, 'timestamp': '2025-09-30 22:14:19.959513', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:20.017273', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.14878325164318085, 'timestamp': '2025-09-30 22:14:20.026688', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:20.087499', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.15660257637500763, 'timestamp': '2025-09-30 22:14:20.091269', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:20.148414', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.18904128670692444, 'timestamp': '2025-09-30 22:14:20.151243', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:20.211127', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.1914321929216385, 'timestamp': '2025-09-30 22:14:20.214207', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:20.270709', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.18746218085289001, 'timestamp': '2025-09-30 22:14:20.277202', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:20.334291', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.1403132826089859, 'timestamp': '2025-09-30 22:14:20.337231', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:20.396557', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.09418366849422455, 'timestamp': '2025-09-30 22:14:20.402447', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:20.458623', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.18422536551952362, 'timestamp': '2025-09-30 22:14:20.461667', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:20.520748', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.2522680163383484, 'timestamp': '2025-09-30 22:14:20.526662', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:20.583155', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.12536053359508514, 'timestamp': '2025-09-30 22:14:20.586034', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:14:20.643977', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.23670747876167297, 'timestamp': '2025-09-30 22:14:20.650794', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:20.708892', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.1810762733221054, 'timestamp': '2025-09-30 22:14:20.711842', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:20.771771', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.10276556015014648, 'timestamp': '2025-09-30 22:14:20.781838', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:20.838223', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.1274155229330063, 'timestamp': '2025-09-30 22:14:20.845097', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:20.903848', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.2698012888431549, 'timestamp': '2025-09-30 22:14:20.906188', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:20.969599', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.17531529068946838, 'timestamp': '2025-09-30 22:14:20.975752', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:21.033731', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.23995427787303925, 'timestamp': '2025-09-30 22:14:21.043555', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:21.101612', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.14535564184188843, 'timestamp': '2025-09-30 22:14:21.108125', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:21.165157', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.19039909541606903, 'timestamp': '2025-09-30 22:14:21.172038', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:21.232922', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.3321883976459503, 'timestamp': '2025-09-30 22:14:21.240251', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:21.301859', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.32779955863952637, 'timestamp': '2025-09-30 22:14:21.312925', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:21.368535', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.07419608533382416, 'timestamp': '2025-09-30 22:14:21.372422', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:21.430844', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.22058144211769104, 'timestamp': '2025-09-30 22:14:21.433561', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:21.494327', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.20929203927516937, 'timestamp': '2025-09-30 22:14:21.501123', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:21.562951', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.23557287454605103, 'timestamp': '2025-09-30 22:14:21.569341', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:21.625234', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.11499197036027908, 'timestamp': '2025-09-30 22:14:21.629914', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:21.688255', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.26611068844795227, 'timestamp': '2025-09-30 22:14:21.692916', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:21.754740', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.15059778094291687, 'timestamp': '2025-09-30 22:14:21.758326', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:14:21.824724', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.16060979664325714, 'timestamp': '2025-09-30 22:14:21.831067', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:21.887212', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.21357613801956177, 'timestamp': '2025-09-30 22:14:21.889618', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:21.946525', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.14361922442913055, 'timestamp': '2025-09-30 22:14:21.949664', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:22.006755', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.24445626139640808, 'timestamp': '2025-09-30 22:14:22.009794', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:22.071626', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.216755211353302, 'timestamp': '2025-09-30 22:14:22.085577', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:22.146496', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.17250461876392365, 'timestamp': '2025-09-30 22:14:22.152017', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:22.219963', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.23556877672672272, 'timestamp': '2025-09-30 22:14:22.222997', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:22.279126', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.22534093260765076, 'timestamp': '2025-09-30 22:14:22.282185', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:22.345711', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.13563022017478943, 'timestamp': '2025-09-30 22:14:22.354243', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:22.409798', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.16160278022289276, 'timestamp': '2025-09-30 22:14:22.418392', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:22.476762', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.2225726693868637, 'timestamp': '2025-09-30 22:14:22.479456', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:22.537132', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.1305561065673828, 'timestamp': '2025-09-30 22:14:22.539860', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:22.614734', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.17517051100730896, 'timestamp': '2025-09-30 22:14:22.622793', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:22.679270', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.10195706784725189, 'timestamp': '2025-09-30 22:14:22.683622', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:22.743901', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.18876315653324127, 'timestamp': '2025-09-30 22:14:22.750473', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:22.807363', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.19305646419525146, 'timestamp': '2025-09-30 22:14:22.810136', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:22.873364', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.17604820430278778, 'timestamp': '2025-09-30 22:14:22.879539', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:22.934890', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.3646824061870575, 'timestamp': '2025-09-30 22:14:22.937311', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:22.994466', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.1425914168357849, 'timestamp': '2025-09-30 22:14:23.001134', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:23.062942', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.18098503351211548, 'timestamp': '2025-09-30 22:14:23.071381', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:23.129159', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.1260293871164322, 'timestamp': '2025-09-30 22:14:23.139440', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:23.198130', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.2066614031791687, 'timestamp': '2025-09-30 22:14:23.201118', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:23.264631', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.1957346796989441, 'timestamp': '2025-09-30 22:14:23.270738', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:23.327767', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.23347598314285278, 'timestamp': '2025-09-30 22:14:23.334371', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:23.395180', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.10905833542346954, 'timestamp': '2025-09-30 22:14:23.405498', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:23.464010', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.24889391660690308, 'timestamp': '2025-09-30 22:14:23.468139', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:23.524512', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.17911602556705475, 'timestamp': '2025-09-30 22:14:23.527676', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:23.590373', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.23549996316432953, 'timestamp': '2025-09-30 22:14:23.594321', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:23.653494', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.16755348443984985, 'timestamp': '2025-09-30 22:14:23.663038', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:23.719565', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.17238503694534302, 'timestamp': '2025-09-30 22:14:23.722047', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:23.787551', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.18611052632331848, 'timestamp': '2025-09-30 22:14:23.791850', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:23.849167', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.14321838319301605, 'timestamp': '2025-09-30 22:14:23.852534', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:23.916344', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.19617880880832672, 'timestamp': '2025-09-30 22:14:23.929036', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:23.984969', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.1885739266872406, 'timestamp': '2025-09-30 22:14:23.991292', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:24.051114', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.1491014063358307, 'timestamp': '2025-09-30 22:14:24.053972', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:14:24.110630', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.19906005263328552, 'timestamp': '2025-09-30 22:14:24.113953', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:24.169731', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.16263529658317566, 'timestamp': '2025-09-30 22:14:24.179299', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:24.235108', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.21398170292377472, 'timestamp': '2025-09-30 22:14:24.241239', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:24.296767', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.11605452001094818, 'timestamp': '2025-09-30 22:14:24.299960', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:24.357834', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.2581191658973694, 'timestamp': '2025-09-30 22:14:24.360930', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:24.417896', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.18563337624073029, 'timestamp': '2025-09-30 22:14:24.426795', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:24.483746', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.19185324013233185, 'timestamp': '2025-09-30 22:14:24.486915', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:24.544014', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.22729821503162384, 'timestamp': '2025-09-30 22:14:24.546722', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:24.603633', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.17525741457939148, 'timestamp': '2025-09-30 22:14:24.610354', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:24.670964', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.1444169580936432, 'timestamp': '2025-09-30 22:14:24.677383', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:24.733394', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.14684930443763733, 'timestamp': '2025-09-30 22:14:24.739389', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:24.801168', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.15587419271469116, 'timestamp': '2025-09-30 22:14:24.804375', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:24.861272', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.2040926218032837, 'timestamp': '2025-09-30 22:14:24.866172', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:24.923636', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.14500278234481812, 'timestamp': '2025-09-30 22:14:24.931883', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:24.995886', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.28097185492515564, 'timestamp': '2025-09-30 22:14:24.998777', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.055744', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.21640853583812714, 'timestamp': '2025-09-30 22:14:25.059895', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:25.130183', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.16275250911712646, 'timestamp': '2025-09-30 22:14:25.137867', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:25.200085', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.14289915561676025, 'timestamp': '2025-09-30 22:14:25.206198', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:25.263978', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.2525399923324585, 'timestamp': '2025-09-30 22:14:25.274077', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:25.331986', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.15229199826717377, 'timestamp': '2025-09-30 22:14:25.340848', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.398110', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.21542875468730927, 'timestamp': '2025-09-30 22:14:25.401049', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.457860', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.2161616086959839, 'timestamp': '2025-09-30 22:14:25.467519', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.524562', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.1971510648727417, 'timestamp': '2025-09-30 22:14:25.528037', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.585647', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.24127475917339325, 'timestamp': '2025-09-30 22:14:25.589292', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.646420', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.25238484144210815, 'timestamp': '2025-09-30 22:14:25.650993', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:25.707824', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.20361314713954926, 'timestamp': '2025-09-30 22:14:25.715073', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.778663', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.22796040773391724, 'timestamp': '2025-09-30 22:14:25.782509', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:25.838875', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.2046705186367035, 'timestamp': '2025-09-30 22:14:25.841779', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:25.898658', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.1700257956981659, 'timestamp': '2025-09-30 22:14:25.901637', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:25.957977', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.2263450026512146, 'timestamp': '2025-09-30 22:14:25.968449', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:26.028196', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.12870869040489197, 'timestamp': '2025-09-30 22:14:26.031877', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:26.088772', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.10318754613399506, 'timestamp': '2025-09-30 22:14:26.095912', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:26.156339', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.14581306278705597, 'timestamp': '2025-09-30 22:14:26.159110', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:26.221358', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.18052217364311218, 'timestamp': '2025-09-30 22:14:26.227688', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:26.283067', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.2501128017902374, 'timestamp': '2025-09-30 22:14:26.286997', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:26.347166', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.21535713970661163, 'timestamp': '2025-09-30 22:14:26.352774', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:26.417928', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.2567717730998993, 'timestamp': '2025-09-30 22:14:26.421148', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:26.481966', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.16098380088806152, 'timestamp': '2025-09-30 22:14:26.488628', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:26.544574', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.19271701574325562, 'timestamp': '2025-09-30 22:14:26.551997', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:26.612542', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.1959923952817917, 'timestamp': '2025-09-30 22:14:26.615324', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:26.681361', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.23874437808990479, 'timestamp': '2025-09-30 22:14:26.684099', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:26.746988', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.14503830671310425, 'timestamp': '2025-09-30 22:14:26.753916', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:26.810887', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.19996874034404755, 'timestamp': '2025-09-30 22:14:26.814179', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:26.871358', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.12098518759012222, 'timestamp': '2025-09-30 22:14:26.877387', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:26.938248', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.17428600788116455, 'timestamp': '2025-09-30 22:14:26.941760', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:26.999570', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.20458275079727173, 'timestamp': '2025-09-30 22:14:27.007388', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:27.063393', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.14844150841236115, 'timestamp': '2025-09-30 22:14:27.067207', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:27.132029', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.17777876555919647, 'timestamp': '2025-09-30 22:14:27.135317', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:27.197137', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.2384628802537918, 'timestamp': '2025-09-30 22:14:27.205341', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:27.265922', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.176698237657547, 'timestamp': '2025-09-30 22:14:27.273530', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:27.335005', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.1836417019367218, 'timestamp': '2025-09-30 22:14:27.343103', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:27.399859', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.22802814841270447, 'timestamp': '2025-09-30 22:14:27.403145', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:27.475124', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.14315029978752136, 'timestamp': '2025-09-30 22:14:27.477378', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:27.536294', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.3215118646621704, 'timestamp': '2025-09-30 22:14:27.543110', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:27.599676', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.22831013798713684, 'timestamp': '2025-09-30 22:14:27.602307', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:27.660091', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.21354234218597412, 'timestamp': '2025-09-30 22:14:27.664879', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:27.727248', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.11711598932743073, 'timestamp': '2025-09-30 22:14:27.731515', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:27.788078', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.1707398146390915, 'timestamp': '2025-09-30 22:14:27.793911', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:27.856692', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.13605940341949463, 'timestamp': '2025-09-30 22:14:27.862187', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:27.925439', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.1255962997674942, 'timestamp': '2025-09-30 22:14:27.927900', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:27.985459', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.1432420164346695, 'timestamp': '2025-09-30 22:14:27.989125', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:28.045992', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.13113202154636383, 'timestamp': '2025-09-30 22:14:28.052964', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:28.108187', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.12723393738269806, 'timestamp': '2025-09-30 22:14:28.115214', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:28.171922', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.11297327280044556, 'timestamp': '2025-09-30 22:14:28.174304', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:28.230686', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.1769147664308548, 'timestamp': '2025-09-30 22:14:28.235320', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:14:28.294656', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.1433108150959015, 'timestamp': '2025-09-30 22:14:28.301758', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:28.362205', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.26335740089416504, 'timestamp': '2025-09-30 22:14:28.367456', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:28.428016', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.17386984825134277, 'timestamp': '2025-09-30 22:14:28.430989', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:28.488426', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.16232264041900635, 'timestamp': '2025-09-30 22:14:28.493162', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:28.551240', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.2131822556257248, 'timestamp': '2025-09-30 22:14:28.558749', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:28.616541', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.2280251532793045, 'timestamp': '2025-09-30 22:14:28.626270', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:28.684189', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.10923244804143906, 'timestamp': '2025-09-30 22:14:28.688955', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:28.748235', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.17389364540576935, 'timestamp': '2025-09-30 22:14:28.751095', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:28.808328', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.12438216060400009, 'timestamp': '2025-09-30 22:14:28.820170', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:28.876162', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.15654303133487701, 'timestamp': '2025-09-30 22:14:28.885509', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:28.949096', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.15823310613632202, 'timestamp': '2025-09-30 22:14:28.956636', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:14:43.160010', 'step': 2330, 'epoch': 1} {'type': 'pplx', 'content': 9117.150967221403, 'timestamp': '2025-09-30 22:14:43.189186', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:43.256284', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.17552071809768677, 'timestamp': '2025-09-30 22:14:43.260529', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:43.333444', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.2311042994260788, 'timestamp': '2025-09-30 22:14:43.342303', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:43.421392', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.15859681367874146, 'timestamp': '2025-09-30 22:14:43.426985', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:43.492589', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.22237025201320648, 'timestamp': '2025-09-30 22:14:43.495586', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:43.559351', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.15000778436660767, 'timestamp': '2025-09-30 22:14:43.564058', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:43.636187', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.17315807938575745, 'timestamp': '2025-09-30 22:14:43.645503', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:43.707588', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.16285116970539093, 'timestamp': '2025-09-30 22:14:43.711367', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:43.775091', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.11713363975286484, 'timestamp': '2025-09-30 22:14:43.778535', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:43.845888', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.131397545337677, 'timestamp': '2025-09-30 22:14:43.848840', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:43.929477', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.1630459725856781, 'timestamp': '2025-09-30 22:14:43.945501', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:44.008998', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.21403591334819794, 'timestamp': '2025-09-30 22:14:44.012796', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:44.077620', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.16051550209522247, 'timestamp': '2025-09-30 22:14:44.081648', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:44.145655', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.17057611048221588, 'timestamp': '2025-09-30 22:14:44.150092', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:44.216357', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.1407320350408554, 'timestamp': '2025-09-30 22:14:44.224846', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:44.298200', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.22453346848487854, 'timestamp': '2025-09-30 22:14:44.302621', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:44.373684', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.17522379755973816, 'timestamp': '2025-09-30 22:14:44.385236', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:44.458099', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.15322500467300415, 'timestamp': '2025-09-30 22:14:44.462020', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:44.529061', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.15579064190387726, 'timestamp': '2025-09-30 22:14:44.539574', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:44.602842', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.1843811422586441, 'timestamp': '2025-09-30 22:14:44.606586', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:44.669361', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.17726419866085052, 'timestamp': '2025-09-30 22:14:44.673797', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:44.736083', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.26714882254600525, 'timestamp': '2025-09-30 22:14:44.743403', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:44.805967', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.07012800127267838, 'timestamp': '2025-09-30 22:14:44.813809', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:44.886311', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.156734898686409, 'timestamp': '2025-09-30 22:14:44.891748', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:44.955754', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.09755849838256836, 'timestamp': '2025-09-30 22:14:44.960212', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:45.026771', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.25805559754371643, 'timestamp': '2025-09-30 22:14:45.037966', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:45.102279', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.16752387583255768, 'timestamp': '2025-09-30 22:14:45.119593', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:45.212877', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.1777576208114624, 'timestamp': '2025-09-30 22:14:45.218324', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:45.283248', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.12508150935173035, 'timestamp': '2025-09-30 22:14:45.286857', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:45.361062', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.17415499687194824, 'timestamp': '2025-09-30 22:14:45.364369', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:45.427754', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.21254004538059235, 'timestamp': '2025-09-30 22:14:45.437088', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:45.499643', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.24726788699626923, 'timestamp': '2025-09-30 22:14:45.509588', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:45.575098', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.20861484110355377, 'timestamp': '2025-09-30 22:14:45.578783', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:45.645613', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.15910978615283966, 'timestamp': '2025-09-30 22:14:45.657017', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:14:45.721937', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.3430810868740082, 'timestamp': '2025-09-30 22:14:45.731650', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:45.805655', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.2481432408094406, 'timestamp': '2025-09-30 22:14:45.809100', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:45.888090', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.2722971737384796, 'timestamp': '2025-09-30 22:14:45.899981', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:45.964178', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.15039776265621185, 'timestamp': '2025-09-30 22:14:45.968858', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:46.032220', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.19610249996185303, 'timestamp': '2025-09-30 22:14:46.050900', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:46.116622', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.2595427334308624, 'timestamp': '2025-09-30 22:14:46.123125', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:46.187876', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.14672571420669556, 'timestamp': '2025-09-30 22:14:46.203016', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:46.269295', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.13328447937965393, 'timestamp': '2025-09-30 22:14:46.274255', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:46.338870', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.12146511673927307, 'timestamp': '2025-09-30 22:14:46.349919', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:46.415947', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.19212506711483002, 'timestamp': '2025-09-30 22:14:46.428234', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:46.494169', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.2342701107263565, 'timestamp': '2025-09-30 22:14:46.506678', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:46.570985', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.2478380650281906, 'timestamp': '2025-09-30 22:14:46.582909', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:46.646695', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.19768400490283966, 'timestamp': '2025-09-30 22:14:46.663934', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:46.727962', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.31971994042396545, 'timestamp': '2025-09-30 22:14:46.732686', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:46.797122', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.2540847957134247, 'timestamp': '2025-09-30 22:14:46.800945', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:46.865753', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.15994450449943542, 'timestamp': '2025-09-30 22:14:46.871168', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:46.935145', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.14024567604064941, 'timestamp': '2025-09-30 22:14:46.954495', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:47.018908', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.12379652261734009, 'timestamp': '2025-09-30 22:14:47.034076', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:47.110834', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.23366430401802063, 'timestamp': '2025-09-30 22:14:47.135273', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:47.201598', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.1958361119031906, 'timestamp': '2025-09-30 22:14:47.217160', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:47.298749', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.2947039306163788, 'timestamp': '2025-09-30 22:14:47.315679', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:47.387210', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.2022911012172699, 'timestamp': '2025-09-30 22:14:47.398828', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:47.471188', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.2241116464138031, 'timestamp': '2025-09-30 22:14:47.483721', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:47.555791', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.1675366312265396, 'timestamp': '2025-09-30 22:14:47.559634', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:47.640231', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.18302084505558014, 'timestamp': '2025-09-30 22:14:47.648882', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:47.719217', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.1853187382221222, 'timestamp': '2025-09-30 22:14:47.723074', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:47.791222', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.20117558538913727, 'timestamp': '2025-09-30 22:14:47.793912', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:47.852779', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.24543379247188568, 'timestamp': '2025-09-30 22:14:47.862464', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:47.921238', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.22485512495040894, 'timestamp': '2025-09-30 22:14:47.937477', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:47.996356', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.25374528765678406, 'timestamp': '2025-09-30 22:14:48.000640', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:48.067389', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.12090495973825455, 'timestamp': '2025-09-30 22:14:48.070901', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:48.128975', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.20992963016033173, 'timestamp': '2025-09-30 22:14:48.133041', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:48.199555', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.2110614776611328, 'timestamp': '2025-09-30 22:14:48.217164', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:48.275478', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.10272719711065292, 'timestamp': '2025-09-30 22:14:48.280296', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:48.338022', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.13136905431747437, 'timestamp': '2025-09-30 22:14:48.348529', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:48.408507', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.1799548715353012, 'timestamp': '2025-09-30 22:14:48.412985', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:48.485408', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.1426241546869278, 'timestamp': '2025-09-30 22:14:48.492802', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:48.552959', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.18987368047237396, 'timestamp': '2025-09-30 22:14:48.557526', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:48.618529', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.13916711509227753, 'timestamp': '2025-09-30 22:14:48.622887', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:48.682404', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.17642711102962494, 'timestamp': '2025-09-30 22:14:48.685457', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:48.757265', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.13613729178905487, 'timestamp': '2025-09-30 22:14:48.765336', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:48.826147', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.1574474722146988, 'timestamp': '2025-09-30 22:14:48.831742', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:48.915780', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.19137932360172272, 'timestamp': '2025-09-30 22:14:48.920253', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:48.997635', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.18336176872253418, 'timestamp': '2025-09-30 22:14:49.003600', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:49.075788', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.12980696558952332, 'timestamp': '2025-09-30 22:14:49.092168', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:49.161423', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.21059653162956238, 'timestamp': '2025-09-30 22:14:49.167636', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:49.239674', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.220656156539917, 'timestamp': '2025-09-30 22:14:49.249986', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:49.318500', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.2922646999359131, 'timestamp': '2025-09-30 22:14:49.322809', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:49.394552', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.2329447865486145, 'timestamp': '2025-09-30 22:14:49.402890', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:49.461586', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.24512623250484467, 'timestamp': '2025-09-30 22:14:49.464850', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:49.533953', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.20438584685325623, 'timestamp': '2025-09-30 22:14:49.537876', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:49.596311', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.2510455846786499, 'timestamp': '2025-09-30 22:14:49.600546', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:49.662070', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.2280808985233307, 'timestamp': '2025-09-30 22:14:49.670152', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:49.735503', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.15025480091571808, 'timestamp': '2025-09-30 22:14:49.749639', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:49.808126', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.2731279134750366, 'timestamp': '2025-09-30 22:14:49.818461', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:49.881346', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.18573153018951416, 'timestamp': '2025-09-30 22:14:49.885256', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:49.946166', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.18710945546627045, 'timestamp': '2025-09-30 22:14:49.953873', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:50.010862', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.17478260397911072, 'timestamp': '2025-09-30 22:14:50.014054', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:50.071787', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.2404249757528305, 'timestamp': '2025-09-30 22:14:50.081426', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:50.138128', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.19125476479530334, 'timestamp': '2025-09-30 22:14:50.141139', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:50.200061', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.12789086997509003, 'timestamp': '2025-09-30 22:14:50.207554', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.264334', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.1820424497127533, 'timestamp': '2025-09-30 22:14:50.268398', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.325824', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.1830054074525833, 'timestamp': '2025-09-30 22:14:50.328578', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.387791', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.16932716965675354, 'timestamp': '2025-09-30 22:14:50.399755', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:50.457616', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.25831812620162964, 'timestamp': '2025-09-30 22:14:50.466362', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.523367', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.10945400595664978, 'timestamp': '2025-09-30 22:14:50.534933', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:50.603916', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.12477719038724899, 'timestamp': '2025-09-30 22:14:50.607533', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.667795', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.22267696261405945, 'timestamp': '2025-09-30 22:14:50.680539', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.739289', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.17901511490345, 'timestamp': '2025-09-30 22:14:50.755002', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.813327', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.19337151944637299, 'timestamp': '2025-09-30 22:14:50.817144', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:50.874498', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.11698523163795471, 'timestamp': '2025-09-30 22:14:50.878007', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:50.935337', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.23925574123859406, 'timestamp': '2025-09-30 22:14:50.938590', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:51.000972', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.1125774160027504, 'timestamp': '2025-09-30 22:14:51.018201', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:51.084118', 'step': 2436, 'epoch': 1} {'type': 'loss', 'content': 0.1420993059873581, 'timestamp': '2025-09-30 22:14:51.087437', 'step': 2437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:51.146909', 'step': 2437, 'epoch': 1} {'type': 'loss', 'content': 0.14610490202903748, 'timestamp': '2025-09-30 22:14:51.157277', 'step': 2438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:51.214334', 'step': 2438, 'epoch': 1} {'type': 'loss', 'content': 0.13928532600402832, 'timestamp': '2025-09-30 22:14:51.218124', 'step': 2439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:51.280784', 'step': 2439, 'epoch': 1} {'type': 'loss', 'content': 0.18392066657543182, 'timestamp': '2025-09-30 22:14:51.287717', 'step': 2440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:51.345358', 'step': 2440, 'epoch': 1} {'type': 'loss', 'content': 0.30574551224708557, 'timestamp': '2025-09-30 22:14:51.349387', 'step': 2441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:51.407195', 'step': 2441, 'epoch': 1} {'type': 'loss', 'content': 0.21831582486629486, 'timestamp': '2025-09-30 22:14:51.419904', 'step': 2442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:51.477436', 'step': 2442, 'epoch': 1} {'type': 'loss', 'content': 0.19518496096134186, 'timestamp': '2025-09-30 22:14:51.481889', 'step': 2443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:51.541719', 'step': 2443, 'epoch': 1} {'type': 'loss', 'content': 0.26850569248199463, 'timestamp': '2025-09-30 22:14:51.549392', 'step': 2444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:51.612648', 'step': 2444, 'epoch': 1} {'type': 'loss', 'content': 0.08385581523180008, 'timestamp': '2025-09-30 22:14:51.623373', 'step': 2445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:51.685777', 'step': 2445, 'epoch': 1} {'type': 'loss', 'content': 0.20922794938087463, 'timestamp': '2025-09-30 22:14:51.690912', 'step': 2446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:51.765861', 'step': 2446, 'epoch': 1} {'type': 'loss', 'content': 0.1345435529947281, 'timestamp': '2025-09-30 22:14:51.770115', 'step': 2447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:51.828542', 'step': 2447, 'epoch': 1} {'type': 'loss', 'content': 0.1851259469985962, 'timestamp': '2025-09-30 22:14:51.845515', 'step': 2448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:51.903170', 'step': 2448, 'epoch': 1} {'type': 'loss', 'content': 0.2172396034002304, 'timestamp': '2025-09-30 22:14:51.906855', 'step': 2449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:51.964635', 'step': 2449, 'epoch': 1} {'type': 'loss', 'content': 0.18100300431251526, 'timestamp': '2025-09-30 22:14:51.968468', 'step': 2450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:52.025856', 'step': 2450, 'epoch': 1} {'type': 'loss', 'content': 0.09947288781404495, 'timestamp': '2025-09-30 22:14:52.039573', 'step': 2451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.097155', 'step': 2451, 'epoch': 1} {'type': 'loss', 'content': 0.2522604167461395, 'timestamp': '2025-09-30 22:14:52.113704', 'step': 2452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.182557', 'step': 2452, 'epoch': 1} {'type': 'loss', 'content': 0.1579332798719406, 'timestamp': '2025-09-30 22:14:52.196696', 'step': 2453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:52.257154', 'step': 2453, 'epoch': 1} {'type': 'loss', 'content': 0.1393425464630127, 'timestamp': '2025-09-30 22:14:52.271563', 'step': 2454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.332063', 'step': 2454, 'epoch': 1} {'type': 'loss', 'content': 0.16499358415603638, 'timestamp': '2025-09-30 22:14:52.336977', 'step': 2455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:52.398469', 'step': 2455, 'epoch': 1} {'type': 'loss', 'content': 0.18746137619018555, 'timestamp': '2025-09-30 22:14:52.405924', 'step': 2456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.472522', 'step': 2456, 'epoch': 1} {'type': 'loss', 'content': 0.18047365546226501, 'timestamp': '2025-09-30 22:14:52.475813', 'step': 2457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.546583', 'step': 2457, 'epoch': 1} {'type': 'loss', 'content': 0.15546633303165436, 'timestamp': '2025-09-30 22:14:52.551637', 'step': 2458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.611408', 'step': 2458, 'epoch': 1} {'type': 'loss', 'content': 0.23098619282245636, 'timestamp': '2025-09-30 22:14:52.624867', 'step': 2459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.688154', 'step': 2459, 'epoch': 1} {'type': 'loss', 'content': 0.15763390064239502, 'timestamp': '2025-09-30 22:14:52.696931', 'step': 2460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:52.759016', 'step': 2460, 'epoch': 1} {'type': 'loss', 'content': 0.20236936211585999, 'timestamp': '2025-09-30 22:14:52.770409', 'step': 2461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:52.835774', 'step': 2461, 'epoch': 1} {'type': 'loss', 'content': 0.17313258349895477, 'timestamp': '2025-09-30 22:14:52.845095', 'step': 2462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:52.908811', 'step': 2462, 'epoch': 1} {'type': 'loss', 'content': 0.2433258444070816, 'timestamp': '2025-09-30 22:14:52.911969', 'step': 2463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:52.973947', 'step': 2463, 'epoch': 1} {'type': 'loss', 'content': 0.21277755498886108, 'timestamp': '2025-09-30 22:14:52.982328', 'step': 2464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:53.049794', 'step': 2464, 'epoch': 1} {'type': 'loss', 'content': 0.22599150240421295, 'timestamp': '2025-09-30 22:14:53.060781', 'step': 2465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:53.123903', 'step': 2465, 'epoch': 1} {'type': 'loss', 'content': 0.20227980613708496, 'timestamp': '2025-09-30 22:14:53.136755', 'step': 2466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:53.203986', 'step': 2466, 'epoch': 1} {'type': 'loss', 'content': 0.1373789757490158, 'timestamp': '2025-09-30 22:14:53.217015', 'step': 2467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:53.277263', 'step': 2467, 'epoch': 1} {'type': 'loss', 'content': 0.21106544137001038, 'timestamp': '2025-09-30 22:14:53.285432', 'step': 2468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:53.345021', 'step': 2468, 'epoch': 1} {'type': 'loss', 'content': 0.11011496186256409, 'timestamp': '2025-09-30 22:14:53.348490', 'step': 2469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:53.411039', 'step': 2469, 'epoch': 1} {'type': 'loss', 'content': 0.29239192605018616, 'timestamp': '2025-09-30 22:14:53.415240', 'step': 2470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:53.479107', 'step': 2470, 'epoch': 1} {'type': 'loss', 'content': 0.15007811784744263, 'timestamp': '2025-09-30 22:14:53.483135', 'step': 2471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:53.544076', 'step': 2471, 'epoch': 1} {'type': 'loss', 'content': 0.16131144762039185, 'timestamp': '2025-09-30 22:14:53.551722', 'step': 2472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:53.609419', 'step': 2472, 'epoch': 1} {'type': 'loss', 'content': 0.18498414754867554, 'timestamp': '2025-09-30 22:14:53.614029', 'step': 2473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:53.671635', 'step': 2473, 'epoch': 1} {'type': 'loss', 'content': 0.21827641129493713, 'timestamp': '2025-09-30 22:14:53.676139', 'step': 2474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:53.740929', 'step': 2474, 'epoch': 1} {'type': 'loss', 'content': 0.09987182915210724, 'timestamp': '2025-09-30 22:14:53.745353', 'step': 2475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:53.803913', 'step': 2475, 'epoch': 1} {'type': 'loss', 'content': 0.1986822932958603, 'timestamp': '2025-09-30 22:14:53.811698', 'step': 2476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:53.867323', 'step': 2476, 'epoch': 1} {'type': 'loss', 'content': 0.2721123695373535, 'timestamp': '2025-09-30 22:14:53.878563', 'step': 2477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:53.936274', 'step': 2477, 'epoch': 1} {'type': 'loss', 'content': 0.09821350872516632, 'timestamp': '2025-09-30 22:14:53.939426', 'step': 2478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:53.998056', 'step': 2478, 'epoch': 1} {'type': 'loss', 'content': 0.1571110635995865, 'timestamp': '2025-09-30 22:14:54.001651', 'step': 2479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:54.059330', 'step': 2479, 'epoch': 1} {'type': 'loss', 'content': 0.12520402669906616, 'timestamp': '2025-09-30 22:14:54.066659', 'step': 2480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:54.129267', 'step': 2480, 'epoch': 1} {'type': 'loss', 'content': 0.2509673237800598, 'timestamp': '2025-09-30 22:14:54.133286', 'step': 2481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:54.190900', 'step': 2481, 'epoch': 1} {'type': 'loss', 'content': 0.14196684956550598, 'timestamp': '2025-09-30 22:14:54.193728', 'step': 2482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:54.260813', 'step': 2482, 'epoch': 1} {'type': 'loss', 'content': 0.2327285259962082, 'timestamp': '2025-09-30 22:14:54.264510', 'step': 2483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:54.323803', 'step': 2483, 'epoch': 1} {'type': 'loss', 'content': 0.17854852974414825, 'timestamp': '2025-09-30 22:14:54.337804', 'step': 2484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:54.397543', 'step': 2484, 'epoch': 1} {'type': 'loss', 'content': 0.21597246825695038, 'timestamp': '2025-09-30 22:14:54.403886', 'step': 2485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:54.469291', 'step': 2485, 'epoch': 1} {'type': 'loss', 'content': 0.24935382604599, 'timestamp': '2025-09-30 22:14:54.471974', 'step': 2486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:54.533695', 'step': 2486, 'epoch': 1} {'type': 'loss', 'content': 0.12285144627094269, 'timestamp': '2025-09-30 22:14:54.542165', 'step': 2487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:54.607179', 'step': 2487, 'epoch': 1} {'type': 'loss', 'content': 0.2714851200580597, 'timestamp': '2025-09-30 22:14:54.615595', 'step': 2488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:54.676252', 'step': 2488, 'epoch': 1} {'type': 'loss', 'content': 0.19220754504203796, 'timestamp': '2025-09-30 22:14:54.683954', 'step': 2489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:54.742503', 'step': 2489, 'epoch': 1} {'type': 'loss', 'content': 0.1670784205198288, 'timestamp': '2025-09-30 22:14:54.746170', 'step': 2490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:54.803085', 'step': 2490, 'epoch': 1} {'type': 'loss', 'content': 0.21276865899562836, 'timestamp': '2025-09-30 22:14:54.816237', 'step': 2491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:54.881819', 'step': 2491, 'epoch': 1} {'type': 'loss', 'content': 0.16201123595237732, 'timestamp': '2025-09-30 22:14:54.895911', 'step': 2492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:54.955119', 'step': 2492, 'epoch': 1} {'type': 'loss', 'content': 0.13308928906917572, 'timestamp': '2025-09-30 22:14:54.963904', 'step': 2493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:55.019759', 'step': 2493, 'epoch': 1} {'type': 'loss', 'content': 0.3088456690311432, 'timestamp': '2025-09-30 22:14:55.023221', 'step': 2494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:55.079762', 'step': 2494, 'epoch': 1} {'type': 'loss', 'content': 0.260591596364975, 'timestamp': '2025-09-30 22:14:55.086733', 'step': 2495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:55.144824', 'step': 2495, 'epoch': 1} {'type': 'loss', 'content': 0.14914414286613464, 'timestamp': '2025-09-30 22:14:55.152040', 'step': 2496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:55.217036', 'step': 2496, 'epoch': 1} {'type': 'loss', 'content': 0.1442883461713791, 'timestamp': '2025-09-30 22:14:55.220066', 'step': 2497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:55.283059', 'step': 2497, 'epoch': 1} {'type': 'loss', 'content': 0.19399085640907288, 'timestamp': '2025-09-30 22:14:55.288527', 'step': 2498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:55.353532', 'step': 2498, 'epoch': 1} {'type': 'loss', 'content': 0.12427385151386261, 'timestamp': '2025-09-30 22:14:55.357978', 'step': 2499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:55.422625', 'step': 2499, 'epoch': 1} {'type': 'loss', 'content': 0.2247624397277832, 'timestamp': '2025-09-30 22:14:55.429620', 'step': 2500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-30 22:14:55.851826', 'step': 2500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:55.911653', 'step': 2500, 'epoch': 1} {'type': 'loss', 'content': 0.18330425024032593, 'timestamp': '2025-09-30 22:14:55.919397', 'step': 2501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:55.979014', 'step': 2501, 'epoch': 1} {'type': 'loss', 'content': 0.21333818137645721, 'timestamp': '2025-09-30 22:14:55.984107', 'step': 2502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:56.043114', 'step': 2502, 'epoch': 1} {'type': 'loss', 'content': 0.10766611993312836, 'timestamp': '2025-09-30 22:14:56.049124', 'step': 2503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:56.118431', 'step': 2503, 'epoch': 1} {'type': 'loss', 'content': 0.19033347070217133, 'timestamp': '2025-09-30 22:14:56.125369', 'step': 2504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:56.182847', 'step': 2504, 'epoch': 1} {'type': 'loss', 'content': 0.22226528823375702, 'timestamp': '2025-09-30 22:14:56.189453', 'step': 2505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:56.247668', 'step': 2505, 'epoch': 1} {'type': 'loss', 'content': 0.15904271602630615, 'timestamp': '2025-09-30 22:14:56.251871', 'step': 2506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:56.309972', 'step': 2506, 'epoch': 1} {'type': 'loss', 'content': 0.1581721305847168, 'timestamp': '2025-09-30 22:14:56.314474', 'step': 2507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:56.372611', 'step': 2507, 'epoch': 1} {'type': 'loss', 'content': 0.26380449533462524, 'timestamp': '2025-09-30 22:14:56.380305', 'step': 2508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:56.436512', 'step': 2508, 'epoch': 1} {'type': 'loss', 'content': 0.12928521633148193, 'timestamp': '2025-09-30 22:14:56.441102', 'step': 2509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:56.498216', 'step': 2509, 'epoch': 1} {'type': 'loss', 'content': 0.1568068563938141, 'timestamp': '2025-09-30 22:14:56.501193', 'step': 2510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:56.559220', 'step': 2510, 'epoch': 1} {'type': 'loss', 'content': 0.18667864799499512, 'timestamp': '2025-09-30 22:14:56.562197', 'step': 2511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:56.627282', 'step': 2511, 'epoch': 1} {'type': 'loss', 'content': 0.17838136851787567, 'timestamp': '2025-09-30 22:14:56.635148', 'step': 2512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:56.700240', 'step': 2512, 'epoch': 1} {'type': 'loss', 'content': 0.30701687932014465, 'timestamp': '2025-09-30 22:14:56.703468', 'step': 2513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:56.759836', 'step': 2513, 'epoch': 1} {'type': 'loss', 'content': 0.17784079909324646, 'timestamp': '2025-09-30 22:14:56.765671', 'step': 2514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:56.823957', 'step': 2514, 'epoch': 1} {'type': 'loss', 'content': 0.19524556398391724, 'timestamp': '2025-09-30 22:14:56.828969', 'step': 2515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:56.894591', 'step': 2515, 'epoch': 1} {'type': 'loss', 'content': 0.20829027891159058, 'timestamp': '2025-09-30 22:14:56.903023', 'step': 2516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:56.962186', 'step': 2516, 'epoch': 1} {'type': 'loss', 'content': 0.21357779204845428, 'timestamp': '2025-09-30 22:14:56.967279', 'step': 2517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:57.038956', 'step': 2517, 'epoch': 1} {'type': 'loss', 'content': 0.10805096477270126, 'timestamp': '2025-09-30 22:14:57.042497', 'step': 2518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:57.101786', 'step': 2518, 'epoch': 1} {'type': 'loss', 'content': 0.1598586142063141, 'timestamp': '2025-09-30 22:14:57.105980', 'step': 2519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:57.165009', 'step': 2519, 'epoch': 1} {'type': 'loss', 'content': 0.15937240421772003, 'timestamp': '2025-09-30 22:14:57.172726', 'step': 2520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:57.230588', 'step': 2520, 'epoch': 1} {'type': 'loss', 'content': 0.29501083493232727, 'timestamp': '2025-09-30 22:14:57.236292', 'step': 2521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:57.297701', 'step': 2521, 'epoch': 1} {'type': 'loss', 'content': 0.10179055482149124, 'timestamp': '2025-09-30 22:14:57.300822', 'step': 2522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:57.359407', 'step': 2522, 'epoch': 1} {'type': 'loss', 'content': 0.13259300589561462, 'timestamp': '2025-09-30 22:14:57.364435', 'step': 2523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:57.422261', 'step': 2523, 'epoch': 1} {'type': 'loss', 'content': 0.16790765523910522, 'timestamp': '2025-09-30 22:14:57.441722', 'step': 2524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:57.516804', 'step': 2524, 'epoch': 1} {'type': 'loss', 'content': 0.24751080572605133, 'timestamp': '2025-09-30 22:14:57.520735', 'step': 2525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:57.580702', 'step': 2525, 'epoch': 1} {'type': 'loss', 'content': 0.1278124749660492, 'timestamp': '2025-09-30 22:14:57.585396', 'step': 2526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:57.644338', 'step': 2526, 'epoch': 1} {'type': 'loss', 'content': 0.2517399191856384, 'timestamp': '2025-09-30 22:14:57.648376', 'step': 2527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:57.716191', 'step': 2527, 'epoch': 1} {'type': 'loss', 'content': 0.2565087080001831, 'timestamp': '2025-09-30 22:14:57.734604', 'step': 2528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:57.810846', 'step': 2528, 'epoch': 1} {'type': 'loss', 'content': 0.19391675293445587, 'timestamp': '2025-09-30 22:14:57.814578', 'step': 2529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:57.878576', 'step': 2529, 'epoch': 1} {'type': 'loss', 'content': 0.2195635586977005, 'timestamp': '2025-09-30 22:14:57.885987', 'step': 2530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:57.951334', 'step': 2530, 'epoch': 1} {'type': 'loss', 'content': 0.20444172620773315, 'timestamp': '2025-09-30 22:14:57.967570', 'step': 2531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:58.042119', 'step': 2531, 'epoch': 1} {'type': 'loss', 'content': 0.17707015573978424, 'timestamp': '2025-09-30 22:14:58.050933', 'step': 2532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:58.110442', 'step': 2532, 'epoch': 1} {'type': 'loss', 'content': 0.17266562581062317, 'timestamp': '2025-09-30 22:14:58.113744', 'step': 2533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:58.171171', 'step': 2533, 'epoch': 1} {'type': 'loss', 'content': 0.1510709822177887, 'timestamp': '2025-09-30 22:14:58.174914', 'step': 2534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:58.240222', 'step': 2534, 'epoch': 1} {'type': 'loss', 'content': 0.1752585470676422, 'timestamp': '2025-09-30 22:14:58.243937', 'step': 2535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:58.306239', 'step': 2535, 'epoch': 1} {'type': 'loss', 'content': 0.21589705348014832, 'timestamp': '2025-09-30 22:14:58.321801', 'step': 2536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:58.381058', 'step': 2536, 'epoch': 1} {'type': 'loss', 'content': 0.09523708373308182, 'timestamp': '2025-09-30 22:14:58.384735', 'step': 2537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:58.450265', 'step': 2537, 'epoch': 1} {'type': 'loss', 'content': 0.12096506357192993, 'timestamp': '2025-09-30 22:14:58.462132', 'step': 2538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:58.534723', 'step': 2538, 'epoch': 1} {'type': 'loss', 'content': 0.2086295783519745, 'timestamp': '2025-09-30 22:14:58.538529', 'step': 2539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:58.608779', 'step': 2539, 'epoch': 1} {'type': 'loss', 'content': 0.1991664469242096, 'timestamp': '2025-09-30 22:14:58.615401', 'step': 2540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:58.676197', 'step': 2540, 'epoch': 1} {'type': 'loss', 'content': 0.2160647213459015, 'timestamp': '2025-09-30 22:14:58.680061', 'step': 2541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:58.737850', 'step': 2541, 'epoch': 1} {'type': 'loss', 'content': 0.11298350989818573, 'timestamp': '2025-09-30 22:14:58.749956', 'step': 2542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:58.807322', 'step': 2542, 'epoch': 1} {'type': 'loss', 'content': 0.10835447907447815, 'timestamp': '2025-09-30 22:14:58.816449', 'step': 2543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:58.876110', 'step': 2543, 'epoch': 1} {'type': 'loss', 'content': 0.21420679986476898, 'timestamp': '2025-09-30 22:14:58.883930', 'step': 2544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:58.941934', 'step': 2544, 'epoch': 1} {'type': 'loss', 'content': 0.15944193303585052, 'timestamp': '2025-09-30 22:14:58.947610', 'step': 2545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:59.010658', 'step': 2545, 'epoch': 1} {'type': 'loss', 'content': 0.1689467430114746, 'timestamp': '2025-09-30 22:14:59.023709', 'step': 2546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:59.091770', 'step': 2546, 'epoch': 1} {'type': 'loss', 'content': 0.198667511343956, 'timestamp': '2025-09-30 22:14:59.096334', 'step': 2547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:59.166579', 'step': 2547, 'epoch': 1} {'type': 'loss', 'content': 0.2605125904083252, 'timestamp': '2025-09-30 22:14:59.174374', 'step': 2548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:59.231858', 'step': 2548, 'epoch': 1} {'type': 'loss', 'content': 0.2330031394958496, 'timestamp': '2025-09-30 22:14:59.235619', 'step': 2549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:14:59.300034', 'step': 2549, 'epoch': 1} {'type': 'loss', 'content': 0.13387064635753632, 'timestamp': '2025-09-30 22:14:59.303769', 'step': 2550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:59.365533', 'step': 2550, 'epoch': 1} {'type': 'loss', 'content': 0.221832737326622, 'timestamp': '2025-09-30 22:14:59.379274', 'step': 2551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:59.451249', 'step': 2551, 'epoch': 1} {'type': 'loss', 'content': 0.1142079159617424, 'timestamp': '2025-09-30 22:14:59.460310', 'step': 2552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:59.518220', 'step': 2552, 'epoch': 1} {'type': 'loss', 'content': 0.2391206920146942, 'timestamp': '2025-09-30 22:14:59.528181', 'step': 2553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:14:59.591597', 'step': 2553, 'epoch': 1} {'type': 'loss', 'content': 0.1742703914642334, 'timestamp': '2025-09-30 22:14:59.595538', 'step': 2554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:59.654333', 'step': 2554, 'epoch': 1} {'type': 'loss', 'content': 0.2167002558708191, 'timestamp': '2025-09-30 22:14:59.668006', 'step': 2555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:14:59.733733', 'step': 2555, 'epoch': 1} {'type': 'loss', 'content': 0.1580507904291153, 'timestamp': '2025-09-30 22:14:59.740800', 'step': 2556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:14:59.798643', 'step': 2556, 'epoch': 1} {'type': 'loss', 'content': 0.13722878694534302, 'timestamp': '2025-09-30 22:14:59.804042', 'step': 2557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:14:59.868933', 'step': 2557, 'epoch': 1} {'type': 'loss', 'content': 0.1441235989332199, 'timestamp': '2025-09-30 22:14:59.884739', 'step': 2558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:14:59.945867', 'step': 2558, 'epoch': 1} {'type': 'loss', 'content': 0.10444111377000809, 'timestamp': '2025-09-30 22:14:59.949890', 'step': 2559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:00.017783', 'step': 2559, 'epoch': 1} {'type': 'loss', 'content': 0.20000381767749786, 'timestamp': '2025-09-30 22:15:00.027077', 'step': 2560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:00.095846', 'step': 2560, 'epoch': 1} {'type': 'loss', 'content': 0.13886095583438873, 'timestamp': '2025-09-30 22:15:00.105568', 'step': 2561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:00.172061', 'step': 2561, 'epoch': 1} {'type': 'loss', 'content': 0.10301496088504791, 'timestamp': '2025-09-30 22:15:00.175220', 'step': 2562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:00.233565', 'step': 2562, 'epoch': 1} {'type': 'loss', 'content': 0.08804189413785934, 'timestamp': '2025-09-30 22:15:00.237785', 'step': 2563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:00.300514', 'step': 2563, 'epoch': 1} {'type': 'loss', 'content': 0.10387322306632996, 'timestamp': '2025-09-30 22:15:00.308295', 'step': 2564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:00.377926', 'step': 2564, 'epoch': 1} {'type': 'loss', 'content': 0.17266467213630676, 'timestamp': '2025-09-30 22:15:00.383741', 'step': 2565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:00.450776', 'step': 2565, 'epoch': 1} {'type': 'loss', 'content': 0.15966589748859406, 'timestamp': '2025-09-30 22:15:00.455396', 'step': 2566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:00.526820', 'step': 2566, 'epoch': 1} {'type': 'loss', 'content': 0.14832298457622528, 'timestamp': '2025-09-30 22:15:00.532167', 'step': 2567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:00.593279', 'step': 2567, 'epoch': 1} {'type': 'loss', 'content': 0.16251219809055328, 'timestamp': '2025-09-30 22:15:00.600860', 'step': 2568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:00.673214', 'step': 2568, 'epoch': 1} {'type': 'loss', 'content': 0.1382535994052887, 'timestamp': '2025-09-30 22:15:00.677410', 'step': 2569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:00.756528', 'step': 2569, 'epoch': 1} {'type': 'loss', 'content': 0.17387069761753082, 'timestamp': '2025-09-30 22:15:00.762844', 'step': 2570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:00.831934', 'step': 2570, 'epoch': 1} {'type': 'loss', 'content': 0.2291257232427597, 'timestamp': '2025-09-30 22:15:00.835880', 'step': 2571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:00.896446', 'step': 2571, 'epoch': 1} {'type': 'loss', 'content': 0.13057926297187805, 'timestamp': '2025-09-30 22:15:00.904955', 'step': 2572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:00.970081', 'step': 2572, 'epoch': 1} {'type': 'loss', 'content': 0.11291274428367615, 'timestamp': '2025-09-30 22:15:00.974655', 'step': 2573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:01.033285', 'step': 2573, 'epoch': 1} {'type': 'loss', 'content': 0.14636556804180145, 'timestamp': '2025-09-30 22:15:01.038780', 'step': 2574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:01.100067', 'step': 2574, 'epoch': 1} {'type': 'loss', 'content': 0.16390246152877808, 'timestamp': '2025-09-30 22:15:01.106195', 'step': 2575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:01.166785', 'step': 2575, 'epoch': 1} {'type': 'loss', 'content': 0.09097833931446075, 'timestamp': '2025-09-30 22:15:01.174584', 'step': 2576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:01.245363', 'step': 2576, 'epoch': 1} {'type': 'loss', 'content': 0.11077236384153366, 'timestamp': '2025-09-30 22:15:01.250107', 'step': 2577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:01.308311', 'step': 2577, 'epoch': 1} {'type': 'loss', 'content': 0.26338785886764526, 'timestamp': '2025-09-30 22:15:01.312339', 'step': 2578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:01.373254', 'step': 2578, 'epoch': 1} {'type': 'loss', 'content': 0.2003774642944336, 'timestamp': '2025-09-30 22:15:01.388733', 'step': 2579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:01.448081', 'step': 2579, 'epoch': 1} {'type': 'loss', 'content': 0.10410445928573608, 'timestamp': '2025-09-30 22:15:01.455407', 'step': 2580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:01.512831', 'step': 2580, 'epoch': 1} {'type': 'loss', 'content': 0.1712438017129898, 'timestamp': '2025-09-30 22:15:01.517623', 'step': 2581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:01.584946', 'step': 2581, 'epoch': 1} {'type': 'loss', 'content': 0.2045280784368515, 'timestamp': '2025-09-30 22:15:01.587977', 'step': 2582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:01.648951', 'step': 2582, 'epoch': 1} {'type': 'loss', 'content': 0.16529704630374908, 'timestamp': '2025-09-30 22:15:01.654101', 'step': 2583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:01.725544', 'step': 2583, 'epoch': 1} {'type': 'loss', 'content': 0.19177888333797455, 'timestamp': '2025-09-30 22:15:01.733614', 'step': 2584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:01.796034', 'step': 2584, 'epoch': 1} {'type': 'loss', 'content': 0.15584532916545868, 'timestamp': '2025-09-30 22:15:01.799843', 'step': 2585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:01.858920', 'step': 2585, 'epoch': 1} {'type': 'loss', 'content': 0.14739751815795898, 'timestamp': '2025-09-30 22:15:01.864136', 'step': 2586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:01.922332', 'step': 2586, 'epoch': 1} {'type': 'loss', 'content': 0.19982501864433289, 'timestamp': '2025-09-30 22:15:01.927209', 'step': 2587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:01.986412', 'step': 2587, 'epoch': 1} {'type': 'loss', 'content': 0.17837761342525482, 'timestamp': '2025-09-30 22:15:01.998724', 'step': 2588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.058542', 'step': 2588, 'epoch': 1} {'type': 'loss', 'content': 0.18556620180606842, 'timestamp': '2025-09-30 22:15:02.072758', 'step': 2589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:02.133743', 'step': 2589, 'epoch': 1} {'type': 'loss', 'content': 0.10886509716510773, 'timestamp': '2025-09-30 22:15:02.142189', 'step': 2590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:02.208701', 'step': 2590, 'epoch': 1} {'type': 'loss', 'content': 0.1983409821987152, 'timestamp': '2025-09-30 22:15:02.214203', 'step': 2591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.272948', 'step': 2591, 'epoch': 1} {'type': 'loss', 'content': 0.1846836656332016, 'timestamp': '2025-09-30 22:15:02.280750', 'step': 2592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:02.337651', 'step': 2592, 'epoch': 1} {'type': 'loss', 'content': 0.16577300429344177, 'timestamp': '2025-09-30 22:15:02.341906', 'step': 2593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.404418', 'step': 2593, 'epoch': 1} {'type': 'loss', 'content': 0.165360689163208, 'timestamp': '2025-09-30 22:15:02.409587', 'step': 2594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.474549', 'step': 2594, 'epoch': 1} {'type': 'loss', 'content': 0.06065543740987778, 'timestamp': '2025-09-30 22:15:02.480893', 'step': 2595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.549008', 'step': 2595, 'epoch': 1} {'type': 'loss', 'content': 0.295931339263916, 'timestamp': '2025-09-30 22:15:02.556882', 'step': 2596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.614519', 'step': 2596, 'epoch': 1} {'type': 'loss', 'content': 0.0884283185005188, 'timestamp': '2025-09-30 22:15:02.620648', 'step': 2597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:02.686330', 'step': 2597, 'epoch': 1} {'type': 'loss', 'content': 0.10425424575805664, 'timestamp': '2025-09-30 22:15:02.691377', 'step': 2598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.756771', 'step': 2598, 'epoch': 1} {'type': 'loss', 'content': 0.16403496265411377, 'timestamp': '2025-09-30 22:15:02.767743', 'step': 2599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:02.836404', 'step': 2599, 'epoch': 1} {'type': 'loss', 'content': 0.21404117345809937, 'timestamp': '2025-09-30 22:15:02.843812', 'step': 2600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:02.900924', 'step': 2600, 'epoch': 1} {'type': 'loss', 'content': 0.1335994452238083, 'timestamp': '2025-09-30 22:15:02.905109', 'step': 2601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:02.965073', 'step': 2601, 'epoch': 1} {'type': 'loss', 'content': 0.2052960991859436, 'timestamp': '2025-09-30 22:15:02.971633', 'step': 2602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:03.031721', 'step': 2602, 'epoch': 1} {'type': 'loss', 'content': 0.15863493084907532, 'timestamp': '2025-09-30 22:15:03.036333', 'step': 2603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:03.101080', 'step': 2603, 'epoch': 1} {'type': 'loss', 'content': 0.15009170770645142, 'timestamp': '2025-09-30 22:15:03.111806', 'step': 2604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:03.170688', 'step': 2604, 'epoch': 1} {'type': 'loss', 'content': 0.1772615909576416, 'timestamp': '2025-09-30 22:15:03.175086', 'step': 2605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:03.245501', 'step': 2605, 'epoch': 1} {'type': 'loss', 'content': 0.2442709356546402, 'timestamp': '2025-09-30 22:15:03.249638', 'step': 2606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:03.312782', 'step': 2606, 'epoch': 1} {'type': 'loss', 'content': 0.23488134145736694, 'timestamp': '2025-09-30 22:15:03.317634', 'step': 2607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:03.382218', 'step': 2607, 'epoch': 1} {'type': 'loss', 'content': 0.10329725593328476, 'timestamp': '2025-09-30 22:15:03.390678', 'step': 2608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:03.449820', 'step': 2608, 'epoch': 1} {'type': 'loss', 'content': 0.23185066878795624, 'timestamp': '2025-09-30 22:15:03.458944', 'step': 2609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:03.519309', 'step': 2609, 'epoch': 1} {'type': 'loss', 'content': 0.20026925206184387, 'timestamp': '2025-09-30 22:15:03.522979', 'step': 2610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:03.584991', 'step': 2610, 'epoch': 1} {'type': 'loss', 'content': 0.13584880530834198, 'timestamp': '2025-09-30 22:15:03.589022', 'step': 2611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:03.655758', 'step': 2611, 'epoch': 1} {'type': 'loss', 'content': 0.1468515396118164, 'timestamp': '2025-09-30 22:15:03.662359', 'step': 2612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:03.726345', 'step': 2612, 'epoch': 1} {'type': 'loss', 'content': 0.17314331233501434, 'timestamp': '2025-09-30 22:15:03.730283', 'step': 2613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:03.787220', 'step': 2613, 'epoch': 1} {'type': 'loss', 'content': 0.1140916720032692, 'timestamp': '2025-09-30 22:15:03.791546', 'step': 2614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:03.849138', 'step': 2614, 'epoch': 1} {'type': 'loss', 'content': 0.1287711262702942, 'timestamp': '2025-09-30 22:15:03.853399', 'step': 2615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:03.925802', 'step': 2615, 'epoch': 1} {'type': 'loss', 'content': 0.2960272431373596, 'timestamp': '2025-09-30 22:15:03.933475', 'step': 2616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:04.000103', 'step': 2616, 'epoch': 1} {'type': 'loss', 'content': 0.30081814527511597, 'timestamp': '2025-09-30 22:15:04.005847', 'step': 2617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:04.070232', 'step': 2617, 'epoch': 1} {'type': 'loss', 'content': 0.146279975771904, 'timestamp': '2025-09-30 22:15:04.074337', 'step': 2618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:04.132147', 'step': 2618, 'epoch': 1} {'type': 'loss', 'content': 0.18171848356723785, 'timestamp': '2025-09-30 22:15:04.136054', 'step': 2619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:04.196435', 'step': 2619, 'epoch': 1} {'type': 'loss', 'content': 0.21145743131637573, 'timestamp': '2025-09-30 22:15:04.204521', 'step': 2620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:04.273324', 'step': 2620, 'epoch': 1} {'type': 'loss', 'content': 0.13964061439037323, 'timestamp': '2025-09-30 22:15:04.278453', 'step': 2621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:04.337903', 'step': 2621, 'epoch': 1} {'type': 'loss', 'content': 0.10770484805107117, 'timestamp': '2025-09-30 22:15:04.350599', 'step': 2622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:04.420339', 'step': 2622, 'epoch': 1} {'type': 'loss', 'content': 0.15074875950813293, 'timestamp': '2025-09-30 22:15:04.425191', 'step': 2623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:04.483047', 'step': 2623, 'epoch': 1} {'type': 'loss', 'content': 0.2452060431241989, 'timestamp': '2025-09-30 22:15:04.490719', 'step': 2624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:04.548405', 'step': 2624, 'epoch': 1} {'type': 'loss', 'content': 0.12032283842563629, 'timestamp': '2025-09-30 22:15:04.563012', 'step': 2625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:04.633521', 'step': 2625, 'epoch': 1} {'type': 'loss', 'content': 0.13043808937072754, 'timestamp': '2025-09-30 22:15:04.637929', 'step': 2626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:04.697311', 'step': 2626, 'epoch': 1} {'type': 'loss', 'content': 0.18437997996807098, 'timestamp': '2025-09-30 22:15:04.711418', 'step': 2627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:04.781453', 'step': 2627, 'epoch': 1} {'type': 'loss', 'content': 0.15835684537887573, 'timestamp': '2025-09-30 22:15:04.790343', 'step': 2628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:04.851415', 'step': 2628, 'epoch': 1} {'type': 'loss', 'content': 0.18530786037445068, 'timestamp': '2025-09-30 22:15:04.856032', 'step': 2629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:04.914296', 'step': 2629, 'epoch': 1} {'type': 'loss', 'content': 0.09488809108734131, 'timestamp': '2025-09-30 22:15:04.929062', 'step': 2630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:04.993499', 'step': 2630, 'epoch': 1} {'type': 'loss', 'content': 0.22282566130161285, 'timestamp': '2025-09-30 22:15:05.001312', 'step': 2631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:05.061975', 'step': 2631, 'epoch': 1} {'type': 'loss', 'content': 0.15209023654460907, 'timestamp': '2025-09-30 22:15:05.071842', 'step': 2632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:05.130142', 'step': 2632, 'epoch': 1} {'type': 'loss', 'content': 0.2601701319217682, 'timestamp': '2025-09-30 22:15:05.134262', 'step': 2633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:05.192351', 'step': 2633, 'epoch': 1} {'type': 'loss', 'content': 0.15585435926914215, 'timestamp': '2025-09-30 22:15:05.196363', 'step': 2634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:05.254721', 'step': 2634, 'epoch': 1} {'type': 'loss', 'content': 0.1952660083770752, 'timestamp': '2025-09-30 22:15:05.261177', 'step': 2635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:05.330848', 'step': 2635, 'epoch': 1} {'type': 'loss', 'content': 0.20595836639404297, 'timestamp': '2025-09-30 22:15:05.342188', 'step': 2636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:05.400918', 'step': 2636, 'epoch': 1} {'type': 'loss', 'content': 0.13235822319984436, 'timestamp': '2025-09-30 22:15:05.405661', 'step': 2637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:05.464129', 'step': 2637, 'epoch': 1} {'type': 'loss', 'content': 0.15370583534240723, 'timestamp': '2025-09-30 22:15:05.472645', 'step': 2638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:05.529660', 'step': 2638, 'epoch': 1} {'type': 'loss', 'content': 0.1522640734910965, 'timestamp': '2025-09-30 22:15:05.534622', 'step': 2639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:05.591962', 'step': 2639, 'epoch': 1} {'type': 'loss', 'content': 0.11329476535320282, 'timestamp': '2025-09-30 22:15:05.600478', 'step': 2640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:05.663324', 'step': 2640, 'epoch': 1} {'type': 'loss', 'content': 0.27778077125549316, 'timestamp': '2025-09-30 22:15:05.674026', 'step': 2641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:05.734053', 'step': 2641, 'epoch': 1} {'type': 'loss', 'content': 0.206989586353302, 'timestamp': '2025-09-30 22:15:05.739978', 'step': 2642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:05.799831', 'step': 2642, 'epoch': 1} {'type': 'loss', 'content': 0.13140852749347687, 'timestamp': '2025-09-30 22:15:05.804005', 'step': 2643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:05.862762', 'step': 2643, 'epoch': 1} {'type': 'loss', 'content': 0.09370981156826019, 'timestamp': '2025-09-30 22:15:05.870371', 'step': 2644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:05.949026', 'step': 2644, 'epoch': 1} {'type': 'loss', 'content': 0.14328159391880035, 'timestamp': '2025-09-30 22:15:05.964570', 'step': 2645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:06.025290', 'step': 2645, 'epoch': 1} {'type': 'loss', 'content': 0.1349937617778778, 'timestamp': '2025-09-30 22:15:06.032070', 'step': 2646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:06.102406', 'step': 2646, 'epoch': 1} {'type': 'loss', 'content': 0.13242661952972412, 'timestamp': '2025-09-30 22:15:06.107869', 'step': 2647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:06.176865', 'step': 2647, 'epoch': 1} {'type': 'loss', 'content': 0.16005507111549377, 'timestamp': '2025-09-30 22:15:06.184291', 'step': 2648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:06.242721', 'step': 2648, 'epoch': 1} {'type': 'loss', 'content': 0.11839720606803894, 'timestamp': '2025-09-30 22:15:06.248208', 'step': 2649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:06.305774', 'step': 2649, 'epoch': 1} {'type': 'loss', 'content': 0.2204393446445465, 'timestamp': '2025-09-30 22:15:06.310626', 'step': 2650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:06.368104', 'step': 2650, 'epoch': 1} {'type': 'loss', 'content': 0.23521730303764343, 'timestamp': '2025-09-30 22:15:06.383995', 'step': 2651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:06.442190', 'step': 2651, 'epoch': 1} {'type': 'loss', 'content': 0.21102815866470337, 'timestamp': '2025-09-30 22:15:06.450385', 'step': 2652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:06.508801', 'step': 2652, 'epoch': 1} {'type': 'loss', 'content': 0.14190855622291565, 'timestamp': '2025-09-30 22:15:06.513899', 'step': 2653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:06.574142', 'step': 2653, 'epoch': 1} {'type': 'loss', 'content': 0.0946912094950676, 'timestamp': '2025-09-30 22:15:06.589797', 'step': 2654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:06.650876', 'step': 2654, 'epoch': 1} {'type': 'loss', 'content': 0.2724651098251343, 'timestamp': '2025-09-30 22:15:06.657476', 'step': 2655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:06.730295', 'step': 2655, 'epoch': 1} {'type': 'loss', 'content': 0.14916783571243286, 'timestamp': '2025-09-30 22:15:06.738620', 'step': 2656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:06.796386', 'step': 2656, 'epoch': 1} {'type': 'loss', 'content': 0.12318522483110428, 'timestamp': '2025-09-30 22:15:06.802442', 'step': 2657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:06.861422', 'step': 2657, 'epoch': 1} {'type': 'loss', 'content': 0.146216481924057, 'timestamp': '2025-09-30 22:15:06.865729', 'step': 2658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:06.924847', 'step': 2658, 'epoch': 1} {'type': 'loss', 'content': 0.1799067109823227, 'timestamp': '2025-09-30 22:15:06.934888', 'step': 2659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:06.992457', 'step': 2659, 'epoch': 1} {'type': 'loss', 'content': 0.19060422480106354, 'timestamp': '2025-09-30 22:15:07.000577', 'step': 2660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:07.059716', 'step': 2660, 'epoch': 1} {'type': 'loss', 'content': 0.276182621717453, 'timestamp': '2025-09-30 22:15:07.066930', 'step': 2661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:07.127287', 'step': 2661, 'epoch': 1} {'type': 'loss', 'content': 0.18221130967140198, 'timestamp': '2025-09-30 22:15:07.131420', 'step': 2662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:07.190128', 'step': 2662, 'epoch': 1} {'type': 'loss', 'content': 0.14768467843532562, 'timestamp': '2025-09-30 22:15:07.195872', 'step': 2663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:07.254289', 'step': 2663, 'epoch': 1} {'type': 'loss', 'content': 0.15823876857757568, 'timestamp': '2025-09-30 22:15:07.271873', 'step': 2664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:07.329602', 'step': 2664, 'epoch': 1} {'type': 'loss', 'content': 0.19436591863632202, 'timestamp': '2025-09-30 22:15:07.343603', 'step': 2665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:07.402444', 'step': 2665, 'epoch': 1} {'type': 'loss', 'content': 0.12489598244428635, 'timestamp': '2025-09-30 22:15:07.408071', 'step': 2666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:07.466955', 'step': 2666, 'epoch': 1} {'type': 'loss', 'content': 0.18820522725582123, 'timestamp': '2025-09-30 22:15:07.471836', 'step': 2667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:07.529858', 'step': 2667, 'epoch': 1} {'type': 'loss', 'content': 0.19998390972614288, 'timestamp': '2025-09-30 22:15:07.542974', 'step': 2668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:07.603655', 'step': 2668, 'epoch': 1} {'type': 'loss', 'content': 0.2195727676153183, 'timestamp': '2025-09-30 22:15:07.609939', 'step': 2669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:07.680825', 'step': 2669, 'epoch': 1} {'type': 'loss', 'content': 0.12729841470718384, 'timestamp': '2025-09-30 22:15:07.685393', 'step': 2670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:07.756598', 'step': 2670, 'epoch': 1} {'type': 'loss', 'content': 0.19149738550186157, 'timestamp': '2025-09-30 22:15:07.772591', 'step': 2671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:07.835096', 'step': 2671, 'epoch': 1} {'type': 'loss', 'content': 0.12590759992599487, 'timestamp': '2025-09-30 22:15:07.844245', 'step': 2672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:07.903589', 'step': 2672, 'epoch': 1} {'type': 'loss', 'content': 0.1455051749944687, 'timestamp': '2025-09-30 22:15:07.917106', 'step': 2673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:07.975941', 'step': 2673, 'epoch': 1} {'type': 'loss', 'content': 0.12720133364200592, 'timestamp': '2025-09-30 22:15:07.983921', 'step': 2674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:08.043772', 'step': 2674, 'epoch': 1} {'type': 'loss', 'content': 0.20601952075958252, 'timestamp': '2025-09-30 22:15:08.048253', 'step': 2675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:15:08.118395', 'step': 2675, 'epoch': 1} {'type': 'loss', 'content': 0.11925432085990906, 'timestamp': '2025-09-30 22:15:08.127568', 'step': 2676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:08.186224', 'step': 2676, 'epoch': 1} {'type': 'loss', 'content': 0.15528933703899384, 'timestamp': '2025-09-30 22:15:08.190766', 'step': 2677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:08.251148', 'step': 2677, 'epoch': 1} {'type': 'loss', 'content': 0.11097989976406097, 'timestamp': '2025-09-30 22:15:08.255182', 'step': 2678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:08.326289', 'step': 2678, 'epoch': 1} {'type': 'loss', 'content': 0.1828681230545044, 'timestamp': '2025-09-30 22:15:08.330049', 'step': 2679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:08.390860', 'step': 2679, 'epoch': 1} {'type': 'loss', 'content': 0.18173715472221375, 'timestamp': '2025-09-30 22:15:08.408475', 'step': 2680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:08.479053', 'step': 2680, 'epoch': 1} {'type': 'loss', 'content': 0.10349886864423752, 'timestamp': '2025-09-30 22:15:08.483749', 'step': 2681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:08.551764', 'step': 2681, 'epoch': 1} {'type': 'loss', 'content': 0.1817288100719452, 'timestamp': '2025-09-30 22:15:08.555648', 'step': 2682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:08.613575', 'step': 2682, 'epoch': 1} {'type': 'loss', 'content': 0.1783367544412613, 'timestamp': '2025-09-30 22:15:08.617801', 'step': 2683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:08.680391', 'step': 2683, 'epoch': 1} {'type': 'loss', 'content': 0.20348656177520752, 'timestamp': '2025-09-30 22:15:08.689101', 'step': 2684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:08.757408', 'step': 2684, 'epoch': 1} {'type': 'loss', 'content': 0.13146555423736572, 'timestamp': '2025-09-30 22:15:08.762631', 'step': 2685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:08.820655', 'step': 2685, 'epoch': 1} {'type': 'loss', 'content': 0.1822759360074997, 'timestamp': '2025-09-30 22:15:08.826468', 'step': 2686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:08.885685', 'step': 2686, 'epoch': 1} {'type': 'loss', 'content': 0.15110670030117035, 'timestamp': '2025-09-30 22:15:08.889086', 'step': 2687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:08.956863', 'step': 2687, 'epoch': 1} {'type': 'loss', 'content': 0.12869684398174286, 'timestamp': '2025-09-30 22:15:08.966448', 'step': 2688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:09.042177', 'step': 2688, 'epoch': 1} {'type': 'loss', 'content': 0.15116523206233978, 'timestamp': '2025-09-30 22:15:09.048372', 'step': 2689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:09.117044', 'step': 2689, 'epoch': 1} {'type': 'loss', 'content': 0.16520652174949646, 'timestamp': '2025-09-30 22:15:09.121862', 'step': 2690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:09.180158', 'step': 2690, 'epoch': 1} {'type': 'loss', 'content': 0.18348059058189392, 'timestamp': '2025-09-30 22:15:09.185393', 'step': 2691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:09.243152', 'step': 2691, 'epoch': 1} {'type': 'loss', 'content': 0.21598432958126068, 'timestamp': '2025-09-30 22:15:09.252076', 'step': 2692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:09.319106', 'step': 2692, 'epoch': 1} {'type': 'loss', 'content': 0.21082021296024323, 'timestamp': '2025-09-30 22:15:09.323451', 'step': 2693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:09.381910', 'step': 2693, 'epoch': 1} {'type': 'loss', 'content': 0.1665232628583908, 'timestamp': '2025-09-30 22:15:09.385800', 'step': 2694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:09.453839', 'step': 2694, 'epoch': 1} {'type': 'loss', 'content': 0.14329583942890167, 'timestamp': '2025-09-30 22:15:09.457440', 'step': 2695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:09.516764', 'step': 2695, 'epoch': 1} {'type': 'loss', 'content': 0.13652122020721436, 'timestamp': '2025-09-30 22:15:09.524286', 'step': 2696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:09.581135', 'step': 2696, 'epoch': 1} {'type': 'loss', 'content': 0.11901744455099106, 'timestamp': '2025-09-30 22:15:09.590275', 'step': 2697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:09.658357', 'step': 2697, 'epoch': 1} {'type': 'loss', 'content': 0.16903601586818695, 'timestamp': '2025-09-30 22:15:09.663917', 'step': 2698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:09.722424', 'step': 2698, 'epoch': 1} {'type': 'loss', 'content': 0.20093399286270142, 'timestamp': '2025-09-30 22:15:09.727490', 'step': 2699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:09.793396', 'step': 2699, 'epoch': 1} {'type': 'loss', 'content': 0.1959141343832016, 'timestamp': '2025-09-30 22:15:09.802949', 'step': 2700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:09.870854', 'step': 2700, 'epoch': 1} {'type': 'loss', 'content': 0.11809537559747696, 'timestamp': '2025-09-30 22:15:09.877153', 'step': 2701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:09.943499', 'step': 2701, 'epoch': 1} {'type': 'loss', 'content': 0.1855251044034958, 'timestamp': '2025-09-30 22:15:09.947747', 'step': 2702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:10.006371', 'step': 2702, 'epoch': 1} {'type': 'loss', 'content': 0.273237943649292, 'timestamp': '2025-09-30 22:15:10.010968', 'step': 2703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:10.069324', 'step': 2703, 'epoch': 1} {'type': 'loss', 'content': 0.16286957263946533, 'timestamp': '2025-09-30 22:15:10.087316', 'step': 2704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:10.146271', 'step': 2704, 'epoch': 1} {'type': 'loss', 'content': 0.21300388872623444, 'timestamp': '2025-09-30 22:15:10.153880', 'step': 2705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:10.226333', 'step': 2705, 'epoch': 1} {'type': 'loss', 'content': 0.16462944447994232, 'timestamp': '2025-09-30 22:15:10.230365', 'step': 2706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:10.289641', 'step': 2706, 'epoch': 1} {'type': 'loss', 'content': 0.25959229469299316, 'timestamp': '2025-09-30 22:15:10.299181', 'step': 2707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:10.368354', 'step': 2707, 'epoch': 1} {'type': 'loss', 'content': 0.19986934959888458, 'timestamp': '2025-09-30 22:15:10.386713', 'step': 2708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:10.446735', 'step': 2708, 'epoch': 1} {'type': 'loss', 'content': 0.21521031856536865, 'timestamp': '2025-09-30 22:15:10.450478', 'step': 2709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:10.517555', 'step': 2709, 'epoch': 1} {'type': 'loss', 'content': 0.2342575192451477, 'timestamp': '2025-09-30 22:15:10.523222', 'step': 2710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:10.581311', 'step': 2710, 'epoch': 1} {'type': 'loss', 'content': 0.17221032083034515, 'timestamp': '2025-09-30 22:15:10.585686', 'step': 2711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:10.643291', 'step': 2711, 'epoch': 1} {'type': 'loss', 'content': 0.21903252601623535, 'timestamp': '2025-09-30 22:15:10.661186', 'step': 2712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:10.731944', 'step': 2712, 'epoch': 1} {'type': 'loss', 'content': 0.22327251732349396, 'timestamp': '2025-09-30 22:15:10.744444', 'step': 2713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:10.806804', 'step': 2713, 'epoch': 1} {'type': 'loss', 'content': 0.14727133512496948, 'timestamp': '2025-09-30 22:15:10.820731', 'step': 2714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:10.878127', 'step': 2714, 'epoch': 1} {'type': 'loss', 'content': 0.18682286143302917, 'timestamp': '2025-09-30 22:15:10.884931', 'step': 2715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:10.943045', 'step': 2715, 'epoch': 1} {'type': 'loss', 'content': 0.12073269486427307, 'timestamp': '2025-09-30 22:15:10.950974', 'step': 2716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:11.011652', 'step': 2716, 'epoch': 1} {'type': 'loss', 'content': 0.13629817962646484, 'timestamp': '2025-09-30 22:15:11.016749', 'step': 2717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:11.078247', 'step': 2717, 'epoch': 1} {'type': 'loss', 'content': 0.2036096453666687, 'timestamp': '2025-09-30 22:15:11.083709', 'step': 2718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:11.152862', 'step': 2718, 'epoch': 1} {'type': 'loss', 'content': 0.13381549715995789, 'timestamp': '2025-09-30 22:15:11.156996', 'step': 2719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:11.214778', 'step': 2719, 'epoch': 1} {'type': 'loss', 'content': 0.2933345437049866, 'timestamp': '2025-09-30 22:15:11.222744', 'step': 2720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:11.279792', 'step': 2720, 'epoch': 1} {'type': 'loss', 'content': 0.22702106833457947, 'timestamp': '2025-09-30 22:15:11.293876', 'step': 2721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:11.352269', 'step': 2721, 'epoch': 1} {'type': 'loss', 'content': 0.160512775182724, 'timestamp': '2025-09-30 22:15:11.361748', 'step': 2722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:11.420377', 'step': 2722, 'epoch': 1} {'type': 'loss', 'content': 0.19595295190811157, 'timestamp': '2025-09-30 22:15:11.423298', 'step': 2723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:11.484213', 'step': 2723, 'epoch': 1} {'type': 'loss', 'content': 0.19386504590511322, 'timestamp': '2025-09-30 22:15:11.493251', 'step': 2724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:11.552092', 'step': 2724, 'epoch': 1} {'type': 'loss', 'content': 0.22318080067634583, 'timestamp': '2025-09-30 22:15:11.557733', 'step': 2725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:11.615664', 'step': 2725, 'epoch': 1} {'type': 'loss', 'content': 0.12289418280124664, 'timestamp': '2025-09-30 22:15:11.620003', 'step': 2726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:11.688116', 'step': 2726, 'epoch': 1} {'type': 'loss', 'content': 0.180960550904274, 'timestamp': '2025-09-30 22:15:11.693739', 'step': 2727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:11.755519', 'step': 2727, 'epoch': 1} {'type': 'loss', 'content': 0.24936415255069733, 'timestamp': '2025-09-30 22:15:11.771913', 'step': 2728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:11.829091', 'step': 2728, 'epoch': 1} {'type': 'loss', 'content': 0.1333617866039276, 'timestamp': '2025-09-30 22:15:11.844964', 'step': 2729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:11.904170', 'step': 2729, 'epoch': 1} {'type': 'loss', 'content': 0.14799952507019043, 'timestamp': '2025-09-30 22:15:11.911750', 'step': 2730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:11.969958', 'step': 2730, 'epoch': 1} {'type': 'loss', 'content': 0.24411122500896454, 'timestamp': '2025-09-30 22:15:11.978947', 'step': 2731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:12.036962', 'step': 2731, 'epoch': 1} {'type': 'loss', 'content': 0.16879329085350037, 'timestamp': '2025-09-30 22:15:12.054106', 'step': 2732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:12.122320', 'step': 2732, 'epoch': 1} {'type': 'loss', 'content': 0.15678371489048004, 'timestamp': '2025-09-30 22:15:12.136186', 'step': 2733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:12.206425', 'step': 2733, 'epoch': 1} {'type': 'loss', 'content': 0.19431684911251068, 'timestamp': '2025-09-30 22:15:12.210887', 'step': 2734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:12.270080', 'step': 2734, 'epoch': 1} {'type': 'loss', 'content': 0.20141194760799408, 'timestamp': '2025-09-30 22:15:12.274558', 'step': 2735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:12.339268', 'step': 2735, 'epoch': 1} {'type': 'loss', 'content': 0.1884860247373581, 'timestamp': '2025-09-30 22:15:12.347894', 'step': 2736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:12.410203', 'step': 2736, 'epoch': 1} {'type': 'loss', 'content': 0.15626713633537292, 'timestamp': '2025-09-30 22:15:12.414627', 'step': 2737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:12.477754', 'step': 2737, 'epoch': 1} {'type': 'loss', 'content': 0.20615501701831818, 'timestamp': '2025-09-30 22:15:12.481028', 'step': 2738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:12.550723', 'step': 2738, 'epoch': 1} {'type': 'loss', 'content': 0.12801313400268555, 'timestamp': '2025-09-30 22:15:12.559166', 'step': 2739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:12.616580', 'step': 2739, 'epoch': 1} {'type': 'loss', 'content': 0.18822287023067474, 'timestamp': '2025-09-30 22:15:12.635164', 'step': 2740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:12.693313', 'step': 2740, 'epoch': 1} {'type': 'loss', 'content': 0.17386434972286224, 'timestamp': '2025-09-30 22:15:12.696919', 'step': 2741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:12.755054', 'step': 2741, 'epoch': 1} {'type': 'loss', 'content': 0.11696849763393402, 'timestamp': '2025-09-30 22:15:12.765767', 'step': 2742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:12.836696', 'step': 2742, 'epoch': 1} {'type': 'loss', 'content': 0.13744531571865082, 'timestamp': '2025-09-30 22:15:12.850122', 'step': 2743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:12.910640', 'step': 2743, 'epoch': 1} {'type': 'loss', 'content': 0.1641198843717575, 'timestamp': '2025-09-30 22:15:12.918413', 'step': 2744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:12.975978', 'step': 2744, 'epoch': 1} {'type': 'loss', 'content': 0.24276603758335114, 'timestamp': '2025-09-30 22:15:12.987285', 'step': 2745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:13.053118', 'step': 2745, 'epoch': 1} {'type': 'loss', 'content': 0.14129626750946045, 'timestamp': '2025-09-30 22:15:13.057159', 'step': 2746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:13.116121', 'step': 2746, 'epoch': 1} {'type': 'loss', 'content': 0.15276029706001282, 'timestamp': '2025-09-30 22:15:13.121847', 'step': 2747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:13.198374', 'step': 2747, 'epoch': 1} {'type': 'loss', 'content': 0.15716883540153503, 'timestamp': '2025-09-30 22:15:13.207946', 'step': 2748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:15:13.274550', 'step': 2748, 'epoch': 1} {'type': 'loss', 'content': 0.20303305983543396, 'timestamp': '2025-09-30 22:15:13.278978', 'step': 2749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:13.339044', 'step': 2749, 'epoch': 1} {'type': 'loss', 'content': 0.11672617495059967, 'timestamp': '2025-09-30 22:15:13.343491', 'step': 2750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:13.405851', 'step': 2750, 'epoch': 1} {'type': 'loss', 'content': 0.2369336634874344, 'timestamp': '2025-09-30 22:15:13.410448', 'step': 2751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:13.471248', 'step': 2751, 'epoch': 1} {'type': 'loss', 'content': 0.11284256726503372, 'timestamp': '2025-09-30 22:15:13.478458', 'step': 2752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:13.544107', 'step': 2752, 'epoch': 1} {'type': 'loss', 'content': 0.2252192497253418, 'timestamp': '2025-09-30 22:15:13.554371', 'step': 2753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:13.613212', 'step': 2753, 'epoch': 1} {'type': 'loss', 'content': 0.26361021399497986, 'timestamp': '2025-09-30 22:15:13.618431', 'step': 2754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:13.685474', 'step': 2754, 'epoch': 1} {'type': 'loss', 'content': 0.2167544960975647, 'timestamp': '2025-09-30 22:15:13.689092', 'step': 2755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:13.747204', 'step': 2755, 'epoch': 1} {'type': 'loss', 'content': 0.12586209177970886, 'timestamp': '2025-09-30 22:15:13.754596', 'step': 2756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:13.811275', 'step': 2756, 'epoch': 1} {'type': 'loss', 'content': 0.1726198047399521, 'timestamp': '2025-09-30 22:15:13.815473', 'step': 2757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:13.874158', 'step': 2757, 'epoch': 1} {'type': 'loss', 'content': 0.19941984117031097, 'timestamp': '2025-09-30 22:15:13.888102', 'step': 2758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:13.945960', 'step': 2758, 'epoch': 1} {'type': 'loss', 'content': 0.1532278209924698, 'timestamp': '2025-09-30 22:15:13.949998', 'step': 2759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:14.020806', 'step': 2759, 'epoch': 1} {'type': 'loss', 'content': 0.10210549831390381, 'timestamp': '2025-09-30 22:15:14.028447', 'step': 2760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:14.085547', 'step': 2760, 'epoch': 1} {'type': 'loss', 'content': 0.14266154170036316, 'timestamp': '2025-09-30 22:15:14.089685', 'step': 2761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:14.154828', 'step': 2761, 'epoch': 1} {'type': 'loss', 'content': 0.15908536314964294, 'timestamp': '2025-09-30 22:15:14.159969', 'step': 2762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:14.226567', 'step': 2762, 'epoch': 1} {'type': 'loss', 'content': 0.18019035458564758, 'timestamp': '2025-09-30 22:15:14.231468', 'step': 2763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:14.289945', 'step': 2763, 'epoch': 1} {'type': 'loss', 'content': 0.18991059064865112, 'timestamp': '2025-09-30 22:15:14.298377', 'step': 2764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:14.362458', 'step': 2764, 'epoch': 1} {'type': 'loss', 'content': 0.1641605645418167, 'timestamp': '2025-09-30 22:15:14.366585', 'step': 2765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:14.425073', 'step': 2765, 'epoch': 1} {'type': 'loss', 'content': 0.20319882035255432, 'timestamp': '2025-09-30 22:15:14.429648', 'step': 2766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:14.487604', 'step': 2766, 'epoch': 1} {'type': 'loss', 'content': 0.19022426009178162, 'timestamp': '2025-09-30 22:15:14.491547', 'step': 2767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:14.558376', 'step': 2767, 'epoch': 1} {'type': 'loss', 'content': 0.18306440114974976, 'timestamp': '2025-09-30 22:15:14.566706', 'step': 2768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:14.623546', 'step': 2768, 'epoch': 1} {'type': 'loss', 'content': 0.14857429265975952, 'timestamp': '2025-09-30 22:15:14.626479', 'step': 2769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:14.684751', 'step': 2769, 'epoch': 1} {'type': 'loss', 'content': 0.2022228091955185, 'timestamp': '2025-09-30 22:15:14.699245', 'step': 2770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:14.756754', 'step': 2770, 'epoch': 1} {'type': 'loss', 'content': 0.11917553842067719, 'timestamp': '2025-09-30 22:15:14.760778', 'step': 2771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:14.819026', 'step': 2771, 'epoch': 1} {'type': 'loss', 'content': 0.26533421874046326, 'timestamp': '2025-09-30 22:15:14.825839', 'step': 2772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:14.882610', 'step': 2772, 'epoch': 1} {'type': 'loss', 'content': 0.07963758707046509, 'timestamp': '2025-09-30 22:15:14.887038', 'step': 2773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:14.958900', 'step': 2773, 'epoch': 1} {'type': 'loss', 'content': 0.22437721490859985, 'timestamp': '2025-09-30 22:15:14.961963', 'step': 2774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:15.022744', 'step': 2774, 'epoch': 1} {'type': 'loss', 'content': 0.18339978158473969, 'timestamp': '2025-09-30 22:15:15.026529', 'step': 2775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:15.085441', 'step': 2775, 'epoch': 1} {'type': 'loss', 'content': 0.18118105828762054, 'timestamp': '2025-09-30 22:15:15.095902', 'step': 2776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:15.155518', 'step': 2776, 'epoch': 1} {'type': 'loss', 'content': 0.14946359395980835, 'timestamp': '2025-09-30 22:15:15.159607', 'step': 2777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:15.219045', 'step': 2777, 'epoch': 1} {'type': 'loss', 'content': 0.1380072832107544, 'timestamp': '2025-09-30 22:15:15.223294', 'step': 2778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:15.282740', 'step': 2778, 'epoch': 1} {'type': 'loss', 'content': 0.11374028772115707, 'timestamp': '2025-09-30 22:15:15.297395', 'step': 2779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:15.355442', 'step': 2779, 'epoch': 1} {'type': 'loss', 'content': 0.25401967763900757, 'timestamp': '2025-09-30 22:15:15.362471', 'step': 2780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:15.452174', 'step': 2780, 'epoch': 1} {'type': 'loss', 'content': 0.1607445478439331, 'timestamp': '2025-09-30 22:15:15.458957', 'step': 2781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:15.516595', 'step': 2781, 'epoch': 1} {'type': 'loss', 'content': 0.14936494827270508, 'timestamp': '2025-09-30 22:15:15.527182', 'step': 2782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:15.586339', 'step': 2782, 'epoch': 1} {'type': 'loss', 'content': 0.21505169570446014, 'timestamp': '2025-09-30 22:15:15.591769', 'step': 2783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:15.650287', 'step': 2783, 'epoch': 1} {'type': 'loss', 'content': 0.229803666472435, 'timestamp': '2025-09-30 22:15:15.658055', 'step': 2784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:15.715342', 'step': 2784, 'epoch': 1} {'type': 'loss', 'content': 0.09091982990503311, 'timestamp': '2025-09-30 22:15:15.719350', 'step': 2785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:15.778576', 'step': 2785, 'epoch': 1} {'type': 'loss', 'content': 0.2608705461025238, 'timestamp': '2025-09-30 22:15:15.781974', 'step': 2786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:15.840799', 'step': 2786, 'epoch': 1} {'type': 'loss', 'content': 0.18939009308815002, 'timestamp': '2025-09-30 22:15:15.844687', 'step': 2787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:15.903254', 'step': 2787, 'epoch': 1} {'type': 'loss', 'content': 0.18309229612350464, 'timestamp': '2025-09-30 22:15:15.910766', 'step': 2788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:15.983878', 'step': 2788, 'epoch': 1} {'type': 'loss', 'content': 0.13415203988552094, 'timestamp': '2025-09-30 22:15:15.987155', 'step': 2789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:16.059166', 'step': 2789, 'epoch': 1} {'type': 'loss', 'content': 0.21407847106456757, 'timestamp': '2025-09-30 22:15:16.065103', 'step': 2790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:16.131755', 'step': 2790, 'epoch': 1} {'type': 'loss', 'content': 0.14203135669231415, 'timestamp': '2025-09-30 22:15:16.135545', 'step': 2791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:16.195062', 'step': 2791, 'epoch': 1} {'type': 'loss', 'content': 0.3595644235610962, 'timestamp': '2025-09-30 22:15:16.201902', 'step': 2792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:16.268147', 'step': 2792, 'epoch': 1} {'type': 'loss', 'content': 0.14390072226524353, 'timestamp': '2025-09-30 22:15:16.272988', 'step': 2793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:16.330542', 'step': 2793, 'epoch': 1} {'type': 'loss', 'content': 0.1781197041273117, 'timestamp': '2025-09-30 22:15:16.334784', 'step': 2794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:16.393292', 'step': 2794, 'epoch': 1} {'type': 'loss', 'content': 0.1742846816778183, 'timestamp': '2025-09-30 22:15:16.403333', 'step': 2795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:16.471716', 'step': 2795, 'epoch': 1} {'type': 'loss', 'content': 0.14616979658603668, 'timestamp': '2025-09-30 22:15:16.486922', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:15:29.988976', 'step': 2796, 'epoch': 1} {'type': 'pplx', 'content': 11172.098746982676, 'timestamp': '2025-09-30 22:15:29.996209', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:30.053949', 'step': 2796, 'epoch': 1} {'type': 'loss', 'content': 0.2742616832256317, 'timestamp': '2025-09-30 22:15:30.059504', 'step': 2797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:30.120956', 'step': 2797, 'epoch': 1} {'type': 'loss', 'content': 0.20970241725444794, 'timestamp': '2025-09-30 22:15:30.124209', 'step': 2798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:30.191539', 'step': 2798, 'epoch': 1} {'type': 'loss', 'content': 0.10791172087192535, 'timestamp': '2025-09-30 22:15:30.196570', 'step': 2799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:30.255537', 'step': 2799, 'epoch': 1} {'type': 'loss', 'content': 0.15677712857723236, 'timestamp': '2025-09-30 22:15:30.265997', 'step': 2800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:30.326503', 'step': 2800, 'epoch': 1} {'type': 'loss', 'content': 0.1171717420220375, 'timestamp': '2025-09-30 22:15:30.331505', 'step': 2801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:30.391059', 'step': 2801, 'epoch': 1} {'type': 'loss', 'content': 0.14716574549674988, 'timestamp': '2025-09-30 22:15:30.406549', 'step': 2802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:30.465277', 'step': 2802, 'epoch': 1} {'type': 'loss', 'content': 0.12466941028833389, 'timestamp': '2025-09-30 22:15:30.469348', 'step': 2803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:30.527653', 'step': 2803, 'epoch': 1} {'type': 'loss', 'content': 0.18236352503299713, 'timestamp': '2025-09-30 22:15:30.535506', 'step': 2804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:30.604430', 'step': 2804, 'epoch': 1} {'type': 'loss', 'content': 0.17487679421901703, 'timestamp': '2025-09-30 22:15:30.618739', 'step': 2805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:30.689975', 'step': 2805, 'epoch': 1} {'type': 'loss', 'content': 0.1217539831995964, 'timestamp': '2025-09-30 22:15:30.694323', 'step': 2806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:30.753896', 'step': 2806, 'epoch': 1} {'type': 'loss', 'content': 0.22714947164058685, 'timestamp': '2025-09-30 22:15:30.758126', 'step': 2807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:30.817536', 'step': 2807, 'epoch': 1} {'type': 'loss', 'content': 0.21397462487220764, 'timestamp': '2025-09-30 22:15:30.827078', 'step': 2808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:30.889227', 'step': 2808, 'epoch': 1} {'type': 'loss', 'content': 0.4312174916267395, 'timestamp': '2025-09-30 22:15:30.892823', 'step': 2809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:30.967895', 'step': 2809, 'epoch': 1} {'type': 'loss', 'content': 0.16413047909736633, 'timestamp': '2025-09-30 22:15:30.972911', 'step': 2810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:31.031255', 'step': 2810, 'epoch': 1} {'type': 'loss', 'content': 0.14406612515449524, 'timestamp': '2025-09-30 22:15:31.035574', 'step': 2811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:31.094186', 'step': 2811, 'epoch': 1} {'type': 'loss', 'content': 0.11840084940195084, 'timestamp': '2025-09-30 22:15:31.103387', 'step': 2812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:31.165060', 'step': 2812, 'epoch': 1} {'type': 'loss', 'content': 0.18980343639850616, 'timestamp': '2025-09-30 22:15:31.176517', 'step': 2813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:31.235753', 'step': 2813, 'epoch': 1} {'type': 'loss', 'content': 0.15077567100524902, 'timestamp': '2025-09-30 22:15:31.250547', 'step': 2814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:31.320890', 'step': 2814, 'epoch': 1} {'type': 'loss', 'content': 0.19883546233177185, 'timestamp': '2025-09-30 22:15:31.326016', 'step': 2815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:31.386135', 'step': 2815, 'epoch': 1} {'type': 'loss', 'content': 0.1950901448726654, 'timestamp': '2025-09-30 22:15:31.395550', 'step': 2816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:31.463293', 'step': 2816, 'epoch': 1} {'type': 'loss', 'content': 0.13779476284980774, 'timestamp': '2025-09-30 22:15:31.466765', 'step': 2817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:31.524535', 'step': 2817, 'epoch': 1} {'type': 'loss', 'content': 0.13903547823429108, 'timestamp': '2025-09-30 22:15:31.530462', 'step': 2818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:31.588420', 'step': 2818, 'epoch': 1} {'type': 'loss', 'content': 0.2952933609485626, 'timestamp': '2025-09-30 22:15:31.602966', 'step': 2819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:31.660045', 'step': 2819, 'epoch': 1} {'type': 'loss', 'content': 0.19614872336387634, 'timestamp': '2025-09-30 22:15:31.668380', 'step': 2820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:31.729765', 'step': 2820, 'epoch': 1} {'type': 'loss', 'content': 0.17233318090438843, 'timestamp': '2025-09-30 22:15:31.734170', 'step': 2821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:31.793413', 'step': 2821, 'epoch': 1} {'type': 'loss', 'content': 0.25368356704711914, 'timestamp': '2025-09-30 22:15:31.799342', 'step': 2822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:31.866969', 'step': 2822, 'epoch': 1} {'type': 'loss', 'content': 0.19360800087451935, 'timestamp': '2025-09-30 22:15:31.879678', 'step': 2823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:31.937049', 'step': 2823, 'epoch': 1} {'type': 'loss', 'content': 0.08405505120754242, 'timestamp': '2025-09-30 22:15:31.944677', 'step': 2824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:32.010884', 'step': 2824, 'epoch': 1} {'type': 'loss', 'content': 0.14375843107700348, 'timestamp': '2025-09-30 22:15:32.015882', 'step': 2825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:32.088507', 'step': 2825, 'epoch': 1} {'type': 'loss', 'content': 0.17937448620796204, 'timestamp': '2025-09-30 22:15:32.094290', 'step': 2826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:32.153593', 'step': 2826, 'epoch': 1} {'type': 'loss', 'content': 0.14837677776813507, 'timestamp': '2025-09-30 22:15:32.168259', 'step': 2827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:32.234540', 'step': 2827, 'epoch': 1} {'type': 'loss', 'content': 0.08890117704868317, 'timestamp': '2025-09-30 22:15:32.243212', 'step': 2828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:32.302468', 'step': 2828, 'epoch': 1} {'type': 'loss', 'content': 0.10489144176244736, 'timestamp': '2025-09-30 22:15:32.307704', 'step': 2829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:32.365413', 'step': 2829, 'epoch': 1} {'type': 'loss', 'content': 0.29134342074394226, 'timestamp': '2025-09-30 22:15:32.370287', 'step': 2830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:32.428118', 'step': 2830, 'epoch': 1} {'type': 'loss', 'content': 0.2080826312303543, 'timestamp': '2025-09-30 22:15:32.433762', 'step': 2831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:32.491101', 'step': 2831, 'epoch': 1} {'type': 'loss', 'content': 0.17155835032463074, 'timestamp': '2025-09-30 22:15:32.499503', 'step': 2832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:32.564849', 'step': 2832, 'epoch': 1} {'type': 'loss', 'content': 0.10871434211730957, 'timestamp': '2025-09-30 22:15:32.578735', 'step': 2833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:32.637953', 'step': 2833, 'epoch': 1} {'type': 'loss', 'content': 0.13375890254974365, 'timestamp': '2025-09-30 22:15:32.641654', 'step': 2834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:32.700226', 'step': 2834, 'epoch': 1} {'type': 'loss', 'content': 0.13929501175880432, 'timestamp': '2025-09-30 22:15:32.704768', 'step': 2835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:32.765683', 'step': 2835, 'epoch': 1} {'type': 'loss', 'content': 0.19196008145809174, 'timestamp': '2025-09-30 22:15:32.783358', 'step': 2836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:32.840195', 'step': 2836, 'epoch': 1} {'type': 'loss', 'content': 0.2198355495929718, 'timestamp': '2025-09-30 22:15:32.856311', 'step': 2837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:32.913221', 'step': 2837, 'epoch': 1} {'type': 'loss', 'content': 0.1978638619184494, 'timestamp': '2025-09-30 22:15:32.928302', 'step': 2838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:32.988939', 'step': 2838, 'epoch': 1} {'type': 'loss', 'content': 0.160124272108078, 'timestamp': '2025-09-30 22:15:33.004184', 'step': 2839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:33.064430', 'step': 2839, 'epoch': 1} {'type': 'loss', 'content': 0.18058130145072937, 'timestamp': '2025-09-30 22:15:33.081166', 'step': 2840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:33.159570', 'step': 2840, 'epoch': 1} {'type': 'loss', 'content': 0.29740169644355774, 'timestamp': '2025-09-30 22:15:33.174992', 'step': 2841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:33.232472', 'step': 2841, 'epoch': 1} {'type': 'loss', 'content': 0.15052209794521332, 'timestamp': '2025-09-30 22:15:33.237911', 'step': 2842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:33.308359', 'step': 2842, 'epoch': 1} {'type': 'loss', 'content': 0.1412227302789688, 'timestamp': '2025-09-30 22:15:33.321422', 'step': 2843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:33.381096', 'step': 2843, 'epoch': 1} {'type': 'loss', 'content': 0.1565481573343277, 'timestamp': '2025-09-30 22:15:33.397851', 'step': 2844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:33.454749', 'step': 2844, 'epoch': 1} {'type': 'loss', 'content': 0.2102099061012268, 'timestamp': '2025-09-30 22:15:33.459680', 'step': 2845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:33.519325', 'step': 2845, 'epoch': 1} {'type': 'loss', 'content': 0.16353413462638855, 'timestamp': '2025-09-30 22:15:33.524629', 'step': 2846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:33.585928', 'step': 2846, 'epoch': 1} {'type': 'loss', 'content': 0.17337268590927124, 'timestamp': '2025-09-30 22:15:33.599979', 'step': 2847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:33.658103', 'step': 2847, 'epoch': 1} {'type': 'loss', 'content': 0.13674582540988922, 'timestamp': '2025-09-30 22:15:33.666353', 'step': 2848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:33.724251', 'step': 2848, 'epoch': 1} {'type': 'loss', 'content': 0.12179689854383469, 'timestamp': '2025-09-30 22:15:33.728007', 'step': 2849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:33.787618', 'step': 2849, 'epoch': 1} {'type': 'loss', 'content': 0.1254466474056244, 'timestamp': '2025-09-30 22:15:33.790916', 'step': 2850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:33.851476', 'step': 2850, 'epoch': 1} {'type': 'loss', 'content': 0.14431867003440857, 'timestamp': '2025-09-30 22:15:33.856324', 'step': 2851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:33.916269', 'step': 2851, 'epoch': 1} {'type': 'loss', 'content': 0.14296798408031464, 'timestamp': '2025-09-30 22:15:33.924307', 'step': 2852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:33.981044', 'step': 2852, 'epoch': 1} {'type': 'loss', 'content': 0.14646434783935547, 'timestamp': '2025-09-30 22:15:33.985316', 'step': 2853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:34.045229', 'step': 2853, 'epoch': 1} {'type': 'loss', 'content': 0.1783745437860489, 'timestamp': '2025-09-30 22:15:34.056739', 'step': 2854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:34.115968', 'step': 2854, 'epoch': 1} {'type': 'loss', 'content': 0.22532160580158234, 'timestamp': '2025-09-30 22:15:34.119196', 'step': 2855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:34.177623', 'step': 2855, 'epoch': 1} {'type': 'loss', 'content': 0.1685783714056015, 'timestamp': '2025-09-30 22:15:34.185731', 'step': 2856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:34.241537', 'step': 2856, 'epoch': 1} {'type': 'loss', 'content': 0.22098302841186523, 'timestamp': '2025-09-30 22:15:34.247222', 'step': 2857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:34.306648', 'step': 2857, 'epoch': 1} {'type': 'loss', 'content': 0.1445629894733429, 'timestamp': '2025-09-30 22:15:34.321951', 'step': 2858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:34.393200', 'step': 2858, 'epoch': 1} {'type': 'loss', 'content': 0.2943008840084076, 'timestamp': '2025-09-30 22:15:34.396543', 'step': 2859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:34.465167', 'step': 2859, 'epoch': 1} {'type': 'loss', 'content': 0.20437286794185638, 'timestamp': '2025-09-30 22:15:34.473969', 'step': 2860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:34.530990', 'step': 2860, 'epoch': 1} {'type': 'loss', 'content': 0.1764429360628128, 'timestamp': '2025-09-30 22:15:34.534792', 'step': 2861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:34.604951', 'step': 2861, 'epoch': 1} {'type': 'loss', 'content': 0.194721058011055, 'timestamp': '2025-09-30 22:15:34.609577', 'step': 2862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:34.694541', 'step': 2862, 'epoch': 1} {'type': 'loss', 'content': 0.07493942230939865, 'timestamp': '2025-09-30 22:15:34.697950', 'step': 2863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:34.767505', 'step': 2863, 'epoch': 1} {'type': 'loss', 'content': 0.12321407347917557, 'timestamp': '2025-09-30 22:15:34.782780', 'step': 2864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:34.843063', 'step': 2864, 'epoch': 1} {'type': 'loss', 'content': 0.22337087988853455, 'timestamp': '2025-09-30 22:15:34.856218', 'step': 2865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:34.922912', 'step': 2865, 'epoch': 1} {'type': 'loss', 'content': 0.21918272972106934, 'timestamp': '2025-09-30 22:15:34.935869', 'step': 2866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:34.999197', 'step': 2866, 'epoch': 1} {'type': 'loss', 'content': 0.20103250443935394, 'timestamp': '2025-09-30 22:15:35.012322', 'step': 2867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:35.083457', 'step': 2867, 'epoch': 1} {'type': 'loss', 'content': 0.1516084223985672, 'timestamp': '2025-09-30 22:15:35.090714', 'step': 2868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:35.148898', 'step': 2868, 'epoch': 1} {'type': 'loss', 'content': 0.20071181654930115, 'timestamp': '2025-09-30 22:15:35.152732', 'step': 2869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:35.219143', 'step': 2869, 'epoch': 1} {'type': 'loss', 'content': 0.1318935602903366, 'timestamp': '2025-09-30 22:15:35.224928', 'step': 2870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:35.283359', 'step': 2870, 'epoch': 1} {'type': 'loss', 'content': 0.1942814141511917, 'timestamp': '2025-09-30 22:15:35.297218', 'step': 2871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:35.356184', 'step': 2871, 'epoch': 1} {'type': 'loss', 'content': 0.12759782373905182, 'timestamp': '2025-09-30 22:15:35.364668', 'step': 2872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:35.423355', 'step': 2872, 'epoch': 1} {'type': 'loss', 'content': 0.24283351004123688, 'timestamp': '2025-09-30 22:15:35.428547', 'step': 2873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:35.486743', 'step': 2873, 'epoch': 1} {'type': 'loss', 'content': 0.09429144859313965, 'timestamp': '2025-09-30 22:15:35.490009', 'step': 2874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:35.549054', 'step': 2874, 'epoch': 1} {'type': 'loss', 'content': 0.1642613410949707, 'timestamp': '2025-09-30 22:15:35.553686', 'step': 2875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:35.616954', 'step': 2875, 'epoch': 1} {'type': 'loss', 'content': 0.21073371171951294, 'timestamp': '2025-09-30 22:15:35.623637', 'step': 2876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:35.681337', 'step': 2876, 'epoch': 1} {'type': 'loss', 'content': 0.23109981417655945, 'timestamp': '2025-09-30 22:15:35.686458', 'step': 2877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:35.744812', 'step': 2877, 'epoch': 1} {'type': 'loss', 'content': 0.17623628675937653, 'timestamp': '2025-09-30 22:15:35.750984', 'step': 2878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:35.808263', 'step': 2878, 'epoch': 1} {'type': 'loss', 'content': 0.16594339907169342, 'timestamp': '2025-09-30 22:15:35.812920', 'step': 2879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:35.871131', 'step': 2879, 'epoch': 1} {'type': 'loss', 'content': 0.155451238155365, 'timestamp': '2025-09-30 22:15:35.879042', 'step': 2880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:35.935871', 'step': 2880, 'epoch': 1} {'type': 'loss', 'content': 0.22185122966766357, 'timestamp': '2025-09-30 22:15:35.949961', 'step': 2881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:36.008918', 'step': 2881, 'epoch': 1} {'type': 'loss', 'content': 0.20493364334106445, 'timestamp': '2025-09-30 22:15:36.016158', 'step': 2882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:36.073458', 'step': 2882, 'epoch': 1} {'type': 'loss', 'content': 0.1533803641796112, 'timestamp': '2025-09-30 22:15:36.076806', 'step': 2883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:36.143154', 'step': 2883, 'epoch': 1} {'type': 'loss', 'content': 0.24690622091293335, 'timestamp': '2025-09-30 22:15:36.149666', 'step': 2884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:36.208368', 'step': 2884, 'epoch': 1} {'type': 'loss', 'content': 0.12173803150653839, 'timestamp': '2025-09-30 22:15:36.214120', 'step': 2885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:36.272064', 'step': 2885, 'epoch': 1} {'type': 'loss', 'content': 0.10013598203659058, 'timestamp': '2025-09-30 22:15:36.277899', 'step': 2886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:36.344561', 'step': 2886, 'epoch': 1} {'type': 'loss', 'content': 0.17180077731609344, 'timestamp': '2025-09-30 22:15:36.351478', 'step': 2887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:36.409837', 'step': 2887, 'epoch': 1} {'type': 'loss', 'content': 0.12298308312892914, 'timestamp': '2025-09-30 22:15:36.427084', 'step': 2888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:36.495935', 'step': 2888, 'epoch': 1} {'type': 'loss', 'content': 0.1787554770708084, 'timestamp': '2025-09-30 22:15:36.500639', 'step': 2889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:36.558644', 'step': 2889, 'epoch': 1} {'type': 'loss', 'content': 0.15046586096286774, 'timestamp': '2025-09-30 22:15:36.561801', 'step': 2890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:36.622064', 'step': 2890, 'epoch': 1} {'type': 'loss', 'content': 0.11846953630447388, 'timestamp': '2025-09-30 22:15:36.626482', 'step': 2891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:36.684963', 'step': 2891, 'epoch': 1} {'type': 'loss', 'content': 0.10909664630889893, 'timestamp': '2025-09-30 22:15:36.692948', 'step': 2892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:36.751284', 'step': 2892, 'epoch': 1} {'type': 'loss', 'content': 0.14736376702785492, 'timestamp': '2025-09-30 22:15:36.757020', 'step': 2893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:36.816313', 'step': 2893, 'epoch': 1} {'type': 'loss', 'content': 0.16339264810085297, 'timestamp': '2025-09-30 22:15:36.821116', 'step': 2894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:36.879765', 'step': 2894, 'epoch': 1} {'type': 'loss', 'content': 0.12630456686019897, 'timestamp': '2025-09-30 22:15:36.883748', 'step': 2895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:15:36.944358', 'step': 2895, 'epoch': 1} {'type': 'loss', 'content': 0.18292976915836334, 'timestamp': '2025-09-30 22:15:36.956731', 'step': 2896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.024642', 'step': 2896, 'epoch': 1} {'type': 'loss', 'content': 0.1036822497844696, 'timestamp': '2025-09-30 22:15:37.028285', 'step': 2897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.087211', 'step': 2897, 'epoch': 1} {'type': 'loss', 'content': 0.18512707948684692, 'timestamp': '2025-09-30 22:15:37.092943', 'step': 2898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.152158', 'step': 2898, 'epoch': 1} {'type': 'loss', 'content': 0.23282966017723083, 'timestamp': '2025-09-30 22:15:37.159192', 'step': 2899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:37.229484', 'step': 2899, 'epoch': 1} {'type': 'loss', 'content': 0.19972968101501465, 'timestamp': '2025-09-30 22:15:37.236525', 'step': 2900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.305736', 'step': 2900, 'epoch': 1} {'type': 'loss', 'content': 0.12239427119493484, 'timestamp': '2025-09-30 22:15:37.311150', 'step': 2901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.371499', 'step': 2901, 'epoch': 1} {'type': 'loss', 'content': 0.157085582613945, 'timestamp': '2025-09-30 22:15:37.375465', 'step': 2902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.438435', 'step': 2902, 'epoch': 1} {'type': 'loss', 'content': 0.23003451526165009, 'timestamp': '2025-09-30 22:15:37.442282', 'step': 2903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:37.512073', 'step': 2903, 'epoch': 1} {'type': 'loss', 'content': 0.09576187282800674, 'timestamp': '2025-09-30 22:15:37.519045', 'step': 2904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:37.588441', 'step': 2904, 'epoch': 1} {'type': 'loss', 'content': 0.19121932983398438, 'timestamp': '2025-09-30 22:15:37.606122', 'step': 2905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.665375', 'step': 2905, 'epoch': 1} {'type': 'loss', 'content': 0.16956812143325806, 'timestamp': '2025-09-30 22:15:37.670638', 'step': 2906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:37.731288', 'step': 2906, 'epoch': 1} {'type': 'loss', 'content': 0.12067122012376785, 'timestamp': '2025-09-30 22:15:37.746898', 'step': 2907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:37.822110', 'step': 2907, 'epoch': 1} {'type': 'loss', 'content': 0.20300790667533875, 'timestamp': '2025-09-30 22:15:37.829424', 'step': 2908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:37.888617', 'step': 2908, 'epoch': 1} {'type': 'loss', 'content': 0.22471986711025238, 'timestamp': '2025-09-30 22:15:37.893021', 'step': 2909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:37.951411', 'step': 2909, 'epoch': 1} {'type': 'loss', 'content': 0.0993969514966011, 'timestamp': '2025-09-30 22:15:37.961193', 'step': 2910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:38.019412', 'step': 2910, 'epoch': 1} {'type': 'loss', 'content': 0.15109314024448395, 'timestamp': '2025-09-30 22:15:38.023047', 'step': 2911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:38.091613', 'step': 2911, 'epoch': 1} {'type': 'loss', 'content': 0.20490922033786774, 'timestamp': '2025-09-30 22:15:38.099457', 'step': 2912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:38.174414', 'step': 2912, 'epoch': 1} {'type': 'loss', 'content': 0.1667960286140442, 'timestamp': '2025-09-30 22:15:38.178364', 'step': 2913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:38.237128', 'step': 2913, 'epoch': 1} {'type': 'loss', 'content': 0.19730299711227417, 'timestamp': '2025-09-30 22:15:38.241358', 'step': 2914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:38.300225', 'step': 2914, 'epoch': 1} {'type': 'loss', 'content': 0.1801101416349411, 'timestamp': '2025-09-30 22:15:38.313560', 'step': 2915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:38.382819', 'step': 2915, 'epoch': 1} {'type': 'loss', 'content': 0.16021893918514252, 'timestamp': '2025-09-30 22:15:38.391347', 'step': 2916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:38.451030', 'step': 2916, 'epoch': 1} {'type': 'loss', 'content': 0.15985965728759766, 'timestamp': '2025-09-30 22:15:38.454547', 'step': 2917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:38.512824', 'step': 2917, 'epoch': 1} {'type': 'loss', 'content': 0.20121756196022034, 'timestamp': '2025-09-30 22:15:38.516326', 'step': 2918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:38.574589', 'step': 2918, 'epoch': 1} {'type': 'loss', 'content': 0.17632608115673065, 'timestamp': '2025-09-30 22:15:38.577930', 'step': 2919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:38.635472', 'step': 2919, 'epoch': 1} {'type': 'loss', 'content': 0.21641570329666138, 'timestamp': '2025-09-30 22:15:38.643118', 'step': 2920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:38.700136', 'step': 2920, 'epoch': 1} {'type': 'loss', 'content': 0.17446941137313843, 'timestamp': '2025-09-30 22:15:38.716906', 'step': 2921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:38.786008', 'step': 2921, 'epoch': 1} {'type': 'loss', 'content': 0.15129882097244263, 'timestamp': '2025-09-30 22:15:38.789469', 'step': 2922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:38.848539', 'step': 2922, 'epoch': 1} {'type': 'loss', 'content': 0.22740304470062256, 'timestamp': '2025-09-30 22:15:38.859014', 'step': 2923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:38.917755', 'step': 2923, 'epoch': 1} {'type': 'loss', 'content': 0.1777876615524292, 'timestamp': '2025-09-30 22:15:38.927470', 'step': 2924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:38.991059', 'step': 2924, 'epoch': 1} {'type': 'loss', 'content': 0.22995387017726898, 'timestamp': '2025-09-30 22:15:38.996461', 'step': 2925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:39.054426', 'step': 2925, 'epoch': 1} {'type': 'loss', 'content': 0.1877669394016266, 'timestamp': '2025-09-30 22:15:39.058964', 'step': 2926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:39.117806', 'step': 2926, 'epoch': 1} {'type': 'loss', 'content': 0.11305664479732513, 'timestamp': '2025-09-30 22:15:39.122293', 'step': 2927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:39.180638', 'step': 2927, 'epoch': 1} {'type': 'loss', 'content': 0.19011546671390533, 'timestamp': '2025-09-30 22:15:39.191835', 'step': 2928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:39.255165', 'step': 2928, 'epoch': 1} {'type': 'loss', 'content': 0.23225651681423187, 'timestamp': '2025-09-30 22:15:39.260971', 'step': 2929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:39.321090', 'step': 2929, 'epoch': 1} {'type': 'loss', 'content': 0.17752093076705933, 'timestamp': '2025-09-30 22:15:39.331099', 'step': 2930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:39.395873', 'step': 2930, 'epoch': 1} {'type': 'loss', 'content': 0.15757642686367035, 'timestamp': '2025-09-30 22:15:39.402248', 'step': 2931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:39.461481', 'step': 2931, 'epoch': 1} {'type': 'loss', 'content': 0.21242980659008026, 'timestamp': '2025-09-30 22:15:39.470361', 'step': 2932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:39.529341', 'step': 2932, 'epoch': 1} {'type': 'loss', 'content': 0.1811305582523346, 'timestamp': '2025-09-30 22:15:39.535323', 'step': 2933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:39.594573', 'step': 2933, 'epoch': 1} {'type': 'loss', 'content': 0.24023482203483582, 'timestamp': '2025-09-30 22:15:39.610404', 'step': 2934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:39.674448', 'step': 2934, 'epoch': 1} {'type': 'loss', 'content': 0.12831711769104004, 'timestamp': '2025-09-30 22:15:39.680803', 'step': 2935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:15:39.750434', 'step': 2935, 'epoch': 1} {'type': 'loss', 'content': 0.23799090087413788, 'timestamp': '2025-09-30 22:15:39.759305', 'step': 2936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:39.816542', 'step': 2936, 'epoch': 1} {'type': 'loss', 'content': 0.1858164668083191, 'timestamp': '2025-09-30 22:15:39.821592', 'step': 2937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:39.882129', 'step': 2937, 'epoch': 1} {'type': 'loss', 'content': 0.23423650860786438, 'timestamp': '2025-09-30 22:15:39.891554', 'step': 2938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:39.954983', 'step': 2938, 'epoch': 1} {'type': 'loss', 'content': 0.18401002883911133, 'timestamp': '2025-09-30 22:15:39.959737', 'step': 2939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:40.019130', 'step': 2939, 'epoch': 1} {'type': 'loss', 'content': 0.25794553756713867, 'timestamp': '2025-09-30 22:15:40.032990', 'step': 2940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:40.100111', 'step': 2940, 'epoch': 1} {'type': 'loss', 'content': 0.19057530164718628, 'timestamp': '2025-09-30 22:15:40.104813', 'step': 2941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:40.171515', 'step': 2941, 'epoch': 1} {'type': 'loss', 'content': 0.13227079808712006, 'timestamp': '2025-09-30 22:15:40.175572', 'step': 2942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:40.237837', 'step': 2942, 'epoch': 1} {'type': 'loss', 'content': 0.14689701795578003, 'timestamp': '2025-09-30 22:15:40.240959', 'step': 2943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:40.310834', 'step': 2943, 'epoch': 1} {'type': 'loss', 'content': 0.09468831866979599, 'timestamp': '2025-09-30 22:15:40.320316', 'step': 2944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:40.387933', 'step': 2944, 'epoch': 1} {'type': 'loss', 'content': 0.15247349441051483, 'timestamp': '2025-09-30 22:15:40.407674', 'step': 2945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:40.469469', 'step': 2945, 'epoch': 1} {'type': 'loss', 'content': 0.14857174456119537, 'timestamp': '2025-09-30 22:15:40.488789', 'step': 2946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:40.551619', 'step': 2946, 'epoch': 1} {'type': 'loss', 'content': 0.17510297894477844, 'timestamp': '2025-09-30 22:15:40.558043', 'step': 2947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:40.618224', 'step': 2947, 'epoch': 1} {'type': 'loss', 'content': 0.132839173078537, 'timestamp': '2025-09-30 22:15:40.627381', 'step': 2948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:40.698196', 'step': 2948, 'epoch': 1} {'type': 'loss', 'content': 0.1048249900341034, 'timestamp': '2025-09-30 22:15:40.705556', 'step': 2949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:40.778332', 'step': 2949, 'epoch': 1} {'type': 'loss', 'content': 0.1469736397266388, 'timestamp': '2025-09-30 22:15:40.783545', 'step': 2950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:40.866957', 'step': 2950, 'epoch': 1} {'type': 'loss', 'content': 0.18463817238807678, 'timestamp': '2025-09-30 22:15:40.871998', 'step': 2951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:40.938558', 'step': 2951, 'epoch': 1} {'type': 'loss', 'content': 0.2216770350933075, 'timestamp': '2025-09-30 22:15:40.947581', 'step': 2952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:41.007574', 'step': 2952, 'epoch': 1} {'type': 'loss', 'content': 0.18052206933498383, 'timestamp': '2025-09-30 22:15:41.011609', 'step': 2953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:41.071867', 'step': 2953, 'epoch': 1} {'type': 'loss', 'content': 0.15659430623054504, 'timestamp': '2025-09-30 22:15:41.079813', 'step': 2954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:41.158975', 'step': 2954, 'epoch': 1} {'type': 'loss', 'content': 0.17115992307662964, 'timestamp': '2025-09-30 22:15:41.168944', 'step': 2955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:41.237342', 'step': 2955, 'epoch': 1} {'type': 'loss', 'content': 0.10662361234426498, 'timestamp': '2025-09-30 22:15:41.250931', 'step': 2956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:41.313506', 'step': 2956, 'epoch': 1} {'type': 'loss', 'content': 0.1734161078929901, 'timestamp': '2025-09-30 22:15:41.331666', 'step': 2957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:41.404414', 'step': 2957, 'epoch': 1} {'type': 'loss', 'content': 0.2514845132827759, 'timestamp': '2025-09-30 22:15:41.408848', 'step': 2958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:41.469460', 'step': 2958, 'epoch': 1} {'type': 'loss', 'content': 0.2098006010055542, 'timestamp': '2025-09-30 22:15:41.486094', 'step': 2959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:41.561322', 'step': 2959, 'epoch': 1} {'type': 'loss', 'content': 0.17903858423233032, 'timestamp': '2025-09-30 22:15:41.582331', 'step': 2960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:41.639356', 'step': 2960, 'epoch': 1} {'type': 'loss', 'content': 0.1879177987575531, 'timestamp': '2025-09-30 22:15:41.643404', 'step': 2961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:41.705341', 'step': 2961, 'epoch': 1} {'type': 'loss', 'content': 0.17874673008918762, 'timestamp': '2025-09-30 22:15:41.718972', 'step': 2962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:41.796055', 'step': 2962, 'epoch': 1} {'type': 'loss', 'content': 0.1593981683254242, 'timestamp': '2025-09-30 22:15:41.799757', 'step': 2963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:41.857229', 'step': 2963, 'epoch': 1} {'type': 'loss', 'content': 0.1529659926891327, 'timestamp': '2025-09-30 22:15:41.865090', 'step': 2964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:41.930560', 'step': 2964, 'epoch': 1} {'type': 'loss', 'content': 0.19542065262794495, 'timestamp': '2025-09-30 22:15:41.941712', 'step': 2965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:42.000346', 'step': 2965, 'epoch': 1} {'type': 'loss', 'content': 0.217673659324646, 'timestamp': '2025-09-30 22:15:42.003801', 'step': 2966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:42.064867', 'step': 2966, 'epoch': 1} {'type': 'loss', 'content': 0.15865102410316467, 'timestamp': '2025-09-30 22:15:42.069975', 'step': 2967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:42.137851', 'step': 2967, 'epoch': 1} {'type': 'loss', 'content': 0.1071506142616272, 'timestamp': '2025-09-30 22:15:42.145416', 'step': 2968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:42.203068', 'step': 2968, 'epoch': 1} {'type': 'loss', 'content': 0.15786951780319214, 'timestamp': '2025-09-30 22:15:42.206465', 'step': 2969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:42.264891', 'step': 2969, 'epoch': 1} {'type': 'loss', 'content': 0.14363522827625275, 'timestamp': '2025-09-30 22:15:42.268441', 'step': 2970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:42.335699', 'step': 2970, 'epoch': 1} {'type': 'loss', 'content': 0.22806769609451294, 'timestamp': '2025-09-30 22:15:42.339527', 'step': 2971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:42.398082', 'step': 2971, 'epoch': 1} {'type': 'loss', 'content': 0.2056294083595276, 'timestamp': '2025-09-30 22:15:42.404639', 'step': 2972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:42.463012', 'step': 2972, 'epoch': 1} {'type': 'loss', 'content': 0.1488521248102188, 'timestamp': '2025-09-30 22:15:42.468023', 'step': 2973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:42.526243', 'step': 2973, 'epoch': 1} {'type': 'loss', 'content': 0.14329615235328674, 'timestamp': '2025-09-30 22:15:42.533287', 'step': 2974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:42.607663', 'step': 2974, 'epoch': 1} {'type': 'loss', 'content': 0.17626900970935822, 'timestamp': '2025-09-30 22:15:42.615100', 'step': 2975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:42.672863', 'step': 2975, 'epoch': 1} {'type': 'loss', 'content': 0.14049571752548218, 'timestamp': '2025-09-30 22:15:42.684055', 'step': 2976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:15:42.755025', 'step': 2976, 'epoch': 1} {'type': 'loss', 'content': 0.16657358407974243, 'timestamp': '2025-09-30 22:15:42.759547', 'step': 2977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:42.818517', 'step': 2977, 'epoch': 1} {'type': 'loss', 'content': 0.1527271866798401, 'timestamp': '2025-09-30 22:15:42.822215', 'step': 2978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:42.889776', 'step': 2978, 'epoch': 1} {'type': 'loss', 'content': 0.10920697450637817, 'timestamp': '2025-09-30 22:15:42.895916', 'step': 2979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:42.955452', 'step': 2979, 'epoch': 1} {'type': 'loss', 'content': 0.1839708387851715, 'timestamp': '2025-09-30 22:15:42.971784', 'step': 2980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:43.039864', 'step': 2980, 'epoch': 1} {'type': 'loss', 'content': 0.13232137262821198, 'timestamp': '2025-09-30 22:15:43.054837', 'step': 2981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:43.116808', 'step': 2981, 'epoch': 1} {'type': 'loss', 'content': 0.194853276014328, 'timestamp': '2025-09-30 22:15:43.122478', 'step': 2982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:43.184248', 'step': 2982, 'epoch': 1} {'type': 'loss', 'content': 0.17749851942062378, 'timestamp': '2025-09-30 22:15:43.195639', 'step': 2983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:43.270553', 'step': 2983, 'epoch': 1} {'type': 'loss', 'content': 0.27063536643981934, 'timestamp': '2025-09-30 22:15:43.278448', 'step': 2984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:43.338442', 'step': 2984, 'epoch': 1} {'type': 'loss', 'content': 0.10984844714403152, 'timestamp': '2025-09-30 22:15:43.342087', 'step': 2985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:43.401850', 'step': 2985, 'epoch': 1} {'type': 'loss', 'content': 0.17796291410923004, 'timestamp': '2025-09-30 22:15:43.407822', 'step': 2986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:43.466831', 'step': 2986, 'epoch': 1} {'type': 'loss', 'content': 0.09819298982620239, 'timestamp': '2025-09-30 22:15:43.471316', 'step': 2987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:43.534054', 'step': 2987, 'epoch': 1} {'type': 'loss', 'content': 0.23932762444019318, 'timestamp': '2025-09-30 22:15:43.554917', 'step': 2988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:43.630479', 'step': 2988, 'epoch': 1} {'type': 'loss', 'content': 0.25137564539909363, 'timestamp': '2025-09-30 22:15:43.635642', 'step': 2989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:43.694957', 'step': 2989, 'epoch': 1} {'type': 'loss', 'content': 0.16941994428634644, 'timestamp': '2025-09-30 22:15:43.699844', 'step': 2990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:43.758081', 'step': 2990, 'epoch': 1} {'type': 'loss', 'content': 0.1378992646932602, 'timestamp': '2025-09-30 22:15:43.763767', 'step': 2991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:43.823484', 'step': 2991, 'epoch': 1} {'type': 'loss', 'content': 0.23311090469360352, 'timestamp': '2025-09-30 22:15:43.832648', 'step': 2992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:43.891988', 'step': 2992, 'epoch': 1} {'type': 'loss', 'content': 0.28956538438796997, 'timestamp': '2025-09-30 22:15:43.897909', 'step': 2993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:43.957168', 'step': 2993, 'epoch': 1} {'type': 'loss', 'content': 0.24710172414779663, 'timestamp': '2025-09-30 22:15:43.961196', 'step': 2994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:44.020337', 'step': 2994, 'epoch': 1} {'type': 'loss', 'content': 0.24240149557590485, 'timestamp': '2025-09-30 22:15:44.027669', 'step': 2995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:44.087508', 'step': 2995, 'epoch': 1} {'type': 'loss', 'content': 0.15730057656764984, 'timestamp': '2025-09-30 22:15:44.095473', 'step': 2996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:44.153387', 'step': 2996, 'epoch': 1} {'type': 'loss', 'content': 0.1739783138036728, 'timestamp': '2025-09-30 22:15:44.159732', 'step': 2997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:44.225801', 'step': 2997, 'epoch': 1} {'type': 'loss', 'content': 0.14543740451335907, 'timestamp': '2025-09-30 22:15:44.231492', 'step': 2998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:44.289502', 'step': 2998, 'epoch': 1} {'type': 'loss', 'content': 0.12942934036254883, 'timestamp': '2025-09-30 22:15:44.294408', 'step': 2999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:44.361763', 'step': 2999, 'epoch': 1} {'type': 'loss', 'content': 0.14000844955444336, 'timestamp': '2025-09-30 22:15:44.369323', 'step': 3000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-30 22:15:44.808842', 'step': 3000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:44.867718', 'step': 3000, 'epoch': 1} {'type': 'loss', 'content': 0.12362444400787354, 'timestamp': '2025-09-30 22:15:44.871740', 'step': 3001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:44.932400', 'step': 3001, 'epoch': 1} {'type': 'loss', 'content': 0.17727993428707123, 'timestamp': '2025-09-30 22:15:44.947550', 'step': 3002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.007552', 'step': 3002, 'epoch': 1} {'type': 'loss', 'content': 0.1528063267469406, 'timestamp': '2025-09-30 22:15:45.021880', 'step': 3003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.084318', 'step': 3003, 'epoch': 1} {'type': 'loss', 'content': 0.14295801520347595, 'timestamp': '2025-09-30 22:15:45.092881', 'step': 3004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:45.150820', 'step': 3004, 'epoch': 1} {'type': 'loss', 'content': 0.10564041137695312, 'timestamp': '2025-09-30 22:15:45.155958', 'step': 3005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.219948', 'step': 3005, 'epoch': 1} {'type': 'loss', 'content': 0.17167237401008606, 'timestamp': '2025-09-30 22:15:45.223065', 'step': 3006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:45.280757', 'step': 3006, 'epoch': 1} {'type': 'loss', 'content': 0.2144218236207962, 'timestamp': '2025-09-30 22:15:45.286662', 'step': 3007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.357809', 'step': 3007, 'epoch': 1} {'type': 'loss', 'content': 0.08326268196105957, 'timestamp': '2025-09-30 22:15:45.365606', 'step': 3008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:45.424365', 'step': 3008, 'epoch': 1} {'type': 'loss', 'content': 0.20317919552326202, 'timestamp': '2025-09-30 22:15:45.429345', 'step': 3009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:45.487890', 'step': 3009, 'epoch': 1} {'type': 'loss', 'content': 0.27521812915802, 'timestamp': '2025-09-30 22:15:45.493763', 'step': 3010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:45.554011', 'step': 3010, 'epoch': 1} {'type': 'loss', 'content': 0.14794111251831055, 'timestamp': '2025-09-30 22:15:45.559145', 'step': 3011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.618115', 'step': 3011, 'epoch': 1} {'type': 'loss', 'content': 0.09009972959756851, 'timestamp': '2025-09-30 22:15:45.624974', 'step': 3012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.683072', 'step': 3012, 'epoch': 1} {'type': 'loss', 'content': 0.15672054886817932, 'timestamp': '2025-09-30 22:15:45.688519', 'step': 3013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:45.747764', 'step': 3013, 'epoch': 1} {'type': 'loss', 'content': 0.12348223477602005, 'timestamp': '2025-09-30 22:15:45.752623', 'step': 3014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.810691', 'step': 3014, 'epoch': 1} {'type': 'loss', 'content': 0.15633557736873627, 'timestamp': '2025-09-30 22:15:45.814726', 'step': 3015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:45.881822', 'step': 3015, 'epoch': 1} {'type': 'loss', 'content': 0.21967348456382751, 'timestamp': '2025-09-30 22:15:45.889567', 'step': 3016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:45.946380', 'step': 3016, 'epoch': 1} {'type': 'loss', 'content': 0.2090706080198288, 'timestamp': '2025-09-30 22:15:45.951201', 'step': 3017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:46.016151', 'step': 3017, 'epoch': 1} {'type': 'loss', 'content': 0.21161620318889618, 'timestamp': '2025-09-30 22:15:46.019700', 'step': 3018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.078292', 'step': 3018, 'epoch': 1} {'type': 'loss', 'content': 0.11267128586769104, 'timestamp': '2025-09-30 22:15:46.083277', 'step': 3019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:46.141582', 'step': 3019, 'epoch': 1} {'type': 'loss', 'content': 0.16562233865261078, 'timestamp': '2025-09-30 22:15:46.148729', 'step': 3020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.213529', 'step': 3020, 'epoch': 1} {'type': 'loss', 'content': 0.14392618834972382, 'timestamp': '2025-09-30 22:15:46.217348', 'step': 3021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:46.276053', 'step': 3021, 'epoch': 1} {'type': 'loss', 'content': 0.12546241283416748, 'timestamp': '2025-09-30 22:15:46.278939', 'step': 3022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.336774', 'step': 3022, 'epoch': 1} {'type': 'loss', 'content': 0.21546800434589386, 'timestamp': '2025-09-30 22:15:46.341243', 'step': 3023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:46.398481', 'step': 3023, 'epoch': 1} {'type': 'loss', 'content': 0.19152215123176575, 'timestamp': '2025-09-30 22:15:46.404975', 'step': 3024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:46.462231', 'step': 3024, 'epoch': 1} {'type': 'loss', 'content': 0.17766204476356506, 'timestamp': '2025-09-30 22:15:46.465638', 'step': 3025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.535405', 'step': 3025, 'epoch': 1} {'type': 'loss', 'content': 0.09222772717475891, 'timestamp': '2025-09-30 22:15:46.549272', 'step': 3026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:46.609306', 'step': 3026, 'epoch': 1} {'type': 'loss', 'content': 0.21096281707286835, 'timestamp': '2025-09-30 22:15:46.620103', 'step': 3027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:46.687417', 'step': 3027, 'epoch': 1} {'type': 'loss', 'content': 0.22521689534187317, 'timestamp': '2025-09-30 22:15:46.694628', 'step': 3028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.753102', 'step': 3028, 'epoch': 1} {'type': 'loss', 'content': 0.12675420939922333, 'timestamp': '2025-09-30 22:15:46.767807', 'step': 3029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.826739', 'step': 3029, 'epoch': 1} {'type': 'loss', 'content': 0.18527527153491974, 'timestamp': '2025-09-30 22:15:46.836808', 'step': 3030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.905335', 'step': 3030, 'epoch': 1} {'type': 'loss', 'content': 0.1526704579591751, 'timestamp': '2025-09-30 22:15:46.917293', 'step': 3031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:46.976072', 'step': 3031, 'epoch': 1} {'type': 'loss', 'content': 0.16419634222984314, 'timestamp': '2025-09-30 22:15:46.992105', 'step': 3032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:47.054335', 'step': 3032, 'epoch': 1} {'type': 'loss', 'content': 0.1891031414270401, 'timestamp': '2025-09-30 22:15:47.059464', 'step': 3033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:47.117756', 'step': 3033, 'epoch': 1} {'type': 'loss', 'content': 0.1906355768442154, 'timestamp': '2025-09-30 22:15:47.120688', 'step': 3034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:47.186080', 'step': 3034, 'epoch': 1} {'type': 'loss', 'content': 0.14360572397708893, 'timestamp': '2025-09-30 22:15:47.189904', 'step': 3035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:47.254587', 'step': 3035, 'epoch': 1} {'type': 'loss', 'content': 0.1815551370382309, 'timestamp': '2025-09-30 22:15:47.262959', 'step': 3036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:47.321437', 'step': 3036, 'epoch': 1} {'type': 'loss', 'content': 0.15318335592746735, 'timestamp': '2025-09-30 22:15:47.324463', 'step': 3037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:47.382029', 'step': 3037, 'epoch': 1} {'type': 'loss', 'content': 0.2293597310781479, 'timestamp': '2025-09-30 22:15:47.391512', 'step': 3038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:47.454062', 'step': 3038, 'epoch': 1} {'type': 'loss', 'content': 0.1447606384754181, 'timestamp': '2025-09-30 22:15:47.465635', 'step': 3039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:47.530433', 'step': 3039, 'epoch': 1} {'type': 'loss', 'content': 0.2197624295949936, 'timestamp': '2025-09-30 22:15:47.537492', 'step': 3040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:47.603134', 'step': 3040, 'epoch': 1} {'type': 'loss', 'content': 0.1111399382352829, 'timestamp': '2025-09-30 22:15:47.616212', 'step': 3041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:47.684112', 'step': 3041, 'epoch': 1} {'type': 'loss', 'content': 0.1599080115556717, 'timestamp': '2025-09-30 22:15:47.686748', 'step': 3042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:47.749087', 'step': 3042, 'epoch': 1} {'type': 'loss', 'content': 0.22115686535835266, 'timestamp': '2025-09-30 22:15:47.752255', 'step': 3043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:47.811234', 'step': 3043, 'epoch': 1} {'type': 'loss', 'content': 0.21778053045272827, 'timestamp': '2025-09-30 22:15:47.817633', 'step': 3044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:47.873539', 'step': 3044, 'epoch': 1} {'type': 'loss', 'content': 0.27931517362594604, 'timestamp': '2025-09-30 22:15:47.877234', 'step': 3045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:47.938628', 'step': 3045, 'epoch': 1} {'type': 'loss', 'content': 0.11990612745285034, 'timestamp': '2025-09-30 22:15:47.942387', 'step': 3046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:48.000542', 'step': 3046, 'epoch': 1} {'type': 'loss', 'content': 0.2497030645608902, 'timestamp': '2025-09-30 22:15:48.010619', 'step': 3047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:48.068504', 'step': 3047, 'epoch': 1} {'type': 'loss', 'content': 0.13120077550411224, 'timestamp': '2025-09-30 22:15:48.076938', 'step': 3048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:48.133121', 'step': 3048, 'epoch': 1} {'type': 'loss', 'content': 0.14893551170825958, 'timestamp': '2025-09-30 22:15:48.137156', 'step': 3049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:48.193891', 'step': 3049, 'epoch': 1} {'type': 'loss', 'content': 0.11640153080224991, 'timestamp': '2025-09-30 22:15:48.197657', 'step': 3050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:48.256401', 'step': 3050, 'epoch': 1} {'type': 'loss', 'content': 0.1946174055337906, 'timestamp': '2025-09-30 22:15:48.268686', 'step': 3051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:48.332580', 'step': 3051, 'epoch': 1} {'type': 'loss', 'content': 0.17557401955127716, 'timestamp': '2025-09-30 22:15:48.345420', 'step': 3052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:48.403188', 'step': 3052, 'epoch': 1} {'type': 'loss', 'content': 0.21069654822349548, 'timestamp': '2025-09-30 22:15:48.405903', 'step': 3053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:48.465176', 'step': 3053, 'epoch': 1} {'type': 'loss', 'content': 0.21193790435791016, 'timestamp': '2025-09-30 22:15:48.468046', 'step': 3054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:48.525326', 'step': 3054, 'epoch': 1} {'type': 'loss', 'content': 0.12104036659002304, 'timestamp': '2025-09-30 22:15:48.529252', 'step': 3055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:48.602695', 'step': 3055, 'epoch': 1} {'type': 'loss', 'content': 0.20897191762924194, 'timestamp': '2025-09-30 22:15:48.610985', 'step': 3056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:48.667720', 'step': 3056, 'epoch': 1} {'type': 'loss', 'content': 0.10484759509563446, 'timestamp': '2025-09-30 22:15:48.671084', 'step': 3057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:48.737116', 'step': 3057, 'epoch': 1} {'type': 'loss', 'content': 0.28223466873168945, 'timestamp': '2025-09-30 22:15:48.740632', 'step': 3058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:48.806838', 'step': 3058, 'epoch': 1} {'type': 'loss', 'content': 0.15686368942260742, 'timestamp': '2025-09-30 22:15:48.812163', 'step': 3059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:48.870079', 'step': 3059, 'epoch': 1} {'type': 'loss', 'content': 0.2715436816215515, 'timestamp': '2025-09-30 22:15:48.878604', 'step': 3060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:48.936647', 'step': 3060, 'epoch': 1} {'type': 'loss', 'content': 0.09446984529495239, 'timestamp': '2025-09-30 22:15:48.946972', 'step': 3061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:49.005376', 'step': 3061, 'epoch': 1} {'type': 'loss', 'content': 0.12449485808610916, 'timestamp': '2025-09-30 22:15:49.008858', 'step': 3062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:49.071579', 'step': 3062, 'epoch': 1} {'type': 'loss', 'content': 0.25528424978256226, 'timestamp': '2025-09-30 22:15:49.074241', 'step': 3063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:49.131816', 'step': 3063, 'epoch': 1} {'type': 'loss', 'content': 0.14119857549667358, 'timestamp': '2025-09-30 22:15:49.137938', 'step': 3064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:49.195587', 'step': 3064, 'epoch': 1} {'type': 'loss', 'content': 0.20133943855762482, 'timestamp': '2025-09-30 22:15:49.199512', 'step': 3065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:49.257258', 'step': 3065, 'epoch': 1} {'type': 'loss', 'content': 0.09769576787948608, 'timestamp': '2025-09-30 22:15:49.260872', 'step': 3066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:49.319259', 'step': 3066, 'epoch': 1} {'type': 'loss', 'content': 0.16707246005535126, 'timestamp': '2025-09-30 22:15:49.326884', 'step': 3067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:49.393297', 'step': 3067, 'epoch': 1} {'type': 'loss', 'content': 0.13316567242145538, 'timestamp': '2025-09-30 22:15:49.400845', 'step': 3068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:49.457197', 'step': 3068, 'epoch': 1} {'type': 'loss', 'content': 0.25597256422042847, 'timestamp': '2025-09-30 22:15:49.460751', 'step': 3069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:49.518861', 'step': 3069, 'epoch': 1} {'type': 'loss', 'content': 0.23862449824810028, 'timestamp': '2025-09-30 22:15:49.521220', 'step': 3070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:49.578012', 'step': 3070, 'epoch': 1} {'type': 'loss', 'content': 0.22202688455581665, 'timestamp': '2025-09-30 22:15:49.586563', 'step': 3071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:49.643613', 'step': 3071, 'epoch': 1} {'type': 'loss', 'content': 0.2394023835659027, 'timestamp': '2025-09-30 22:15:49.649625', 'step': 3072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:49.715169', 'step': 3072, 'epoch': 1} {'type': 'loss', 'content': 0.238351970911026, 'timestamp': '2025-09-30 22:15:49.717868', 'step': 3073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:49.780479', 'step': 3073, 'epoch': 1} {'type': 'loss', 'content': 0.13163617253303528, 'timestamp': '2025-09-30 22:15:49.784253', 'step': 3074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:49.864949', 'step': 3074, 'epoch': 1} {'type': 'loss', 'content': 0.15961067378520966, 'timestamp': '2025-09-30 22:15:49.869209', 'step': 3075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:49.926020', 'step': 3075, 'epoch': 1} {'type': 'loss', 'content': 0.10097511112689972, 'timestamp': '2025-09-30 22:15:49.931969', 'step': 3076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:49.987680', 'step': 3076, 'epoch': 1} {'type': 'loss', 'content': 0.21262946724891663, 'timestamp': '2025-09-30 22:15:49.990749', 'step': 3077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:50.047148', 'step': 3077, 'epoch': 1} {'type': 'loss', 'content': 0.15875642001628876, 'timestamp': '2025-09-30 22:15:50.051300', 'step': 3078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:50.110135', 'step': 3078, 'epoch': 1} {'type': 'loss', 'content': 0.1387036293745041, 'timestamp': '2025-09-30 22:15:50.113727', 'step': 3079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:50.172850', 'step': 3079, 'epoch': 1} {'type': 'loss', 'content': 0.24221722781658173, 'timestamp': '2025-09-30 22:15:50.179061', 'step': 3080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:50.234917', 'step': 3080, 'epoch': 1} {'type': 'loss', 'content': 0.20713762938976288, 'timestamp': '2025-09-30 22:15:50.238055', 'step': 3081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:50.294268', 'step': 3081, 'epoch': 1} {'type': 'loss', 'content': 0.23840871453285217, 'timestamp': '2025-09-30 22:15:50.297218', 'step': 3082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:50.359574', 'step': 3082, 'epoch': 1} {'type': 'loss', 'content': 0.15120942890644073, 'timestamp': '2025-09-30 22:15:50.362681', 'step': 3083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:50.421082', 'step': 3083, 'epoch': 1} {'type': 'loss', 'content': 0.1742478460073471, 'timestamp': '2025-09-30 22:15:50.427575', 'step': 3084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:50.484056', 'step': 3084, 'epoch': 1} {'type': 'loss', 'content': 0.07151822000741959, 'timestamp': '2025-09-30 22:15:50.488060', 'step': 3085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:50.546193', 'step': 3085, 'epoch': 1} {'type': 'loss', 'content': 0.19045768678188324, 'timestamp': '2025-09-30 22:15:50.550203', 'step': 3086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:50.606804', 'step': 3086, 'epoch': 1} {'type': 'loss', 'content': 0.17280080914497375, 'timestamp': '2025-09-30 22:15:50.609334', 'step': 3087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:50.668857', 'step': 3087, 'epoch': 1} {'type': 'loss', 'content': 0.22777625918388367, 'timestamp': '2025-09-30 22:15:50.676602', 'step': 3088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:50.733734', 'step': 3088, 'epoch': 1} {'type': 'loss', 'content': 0.2690066993236542, 'timestamp': '2025-09-30 22:15:50.736338', 'step': 3089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:50.792510', 'step': 3089, 'epoch': 1} {'type': 'loss', 'content': 0.11085936427116394, 'timestamp': '2025-09-30 22:15:50.799785', 'step': 3090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:50.856959', 'step': 3090, 'epoch': 1} {'type': 'loss', 'content': 0.20098012685775757, 'timestamp': '2025-09-30 22:15:50.862372', 'step': 3091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:50.922133', 'step': 3091, 'epoch': 1} {'type': 'loss', 'content': 0.1704084426164627, 'timestamp': '2025-09-30 22:15:50.928612', 'step': 3092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:50.985789', 'step': 3092, 'epoch': 1} {'type': 'loss', 'content': 0.2049131542444229, 'timestamp': '2025-09-30 22:15:50.988232', 'step': 3093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:51.045011', 'step': 3093, 'epoch': 1} {'type': 'loss', 'content': 0.11828447878360748, 'timestamp': '2025-09-30 22:15:51.047845', 'step': 3094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:51.105781', 'step': 3094, 'epoch': 1} {'type': 'loss', 'content': 0.1435014009475708, 'timestamp': '2025-09-30 22:15:51.112980', 'step': 3095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:51.178938', 'step': 3095, 'epoch': 1} {'type': 'loss', 'content': 0.19025757908821106, 'timestamp': '2025-09-30 22:15:51.186007', 'step': 3096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:51.243525', 'step': 3096, 'epoch': 1} {'type': 'loss', 'content': 0.09611277282238007, 'timestamp': '2025-09-30 22:15:51.247110', 'step': 3097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:51.304284', 'step': 3097, 'epoch': 1} {'type': 'loss', 'content': 0.24845746159553528, 'timestamp': '2025-09-30 22:15:51.306988', 'step': 3098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:51.367200', 'step': 3098, 'epoch': 1} {'type': 'loss', 'content': 0.1424786001443863, 'timestamp': '2025-09-30 22:15:51.370063', 'step': 3099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:51.426120', 'step': 3099, 'epoch': 1} {'type': 'loss', 'content': 0.19812747836112976, 'timestamp': '2025-09-30 22:15:51.433225', 'step': 3100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:51.494654', 'step': 3100, 'epoch': 1} {'type': 'loss', 'content': 0.2198619842529297, 'timestamp': '2025-09-30 22:15:51.497373', 'step': 3101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:51.554059', 'step': 3101, 'epoch': 1} {'type': 'loss', 'content': 0.1004205048084259, 'timestamp': '2025-09-30 22:15:51.556988', 'step': 3102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:51.615506', 'step': 3102, 'epoch': 1} {'type': 'loss', 'content': 0.19253598153591156, 'timestamp': '2025-09-30 22:15:51.618974', 'step': 3103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:51.676254', 'step': 3103, 'epoch': 1} {'type': 'loss', 'content': 0.10642591118812561, 'timestamp': '2025-09-30 22:15:51.682174', 'step': 3104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:51.737715', 'step': 3104, 'epoch': 1} {'type': 'loss', 'content': 0.2418426126241684, 'timestamp': '2025-09-30 22:15:51.740880', 'step': 3105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:51.799800', 'step': 3105, 'epoch': 1} {'type': 'loss', 'content': 0.17634589970111847, 'timestamp': '2025-09-30 22:15:51.802348', 'step': 3106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:51.863449', 'step': 3106, 'epoch': 1} {'type': 'loss', 'content': 0.10850326716899872, 'timestamp': '2025-09-30 22:15:51.866168', 'step': 3107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:51.922698', 'step': 3107, 'epoch': 1} {'type': 'loss', 'content': 0.12044636160135269, 'timestamp': '2025-09-30 22:15:51.928897', 'step': 3108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:51.986074', 'step': 3108, 'epoch': 1} {'type': 'loss', 'content': 0.15309131145477295, 'timestamp': '2025-09-30 22:15:51.989150', 'step': 3109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:52.046123', 'step': 3109, 'epoch': 1} {'type': 'loss', 'content': 0.18220461905002594, 'timestamp': '2025-09-30 22:15:52.050789', 'step': 3110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:52.107381', 'step': 3110, 'epoch': 1} {'type': 'loss', 'content': 0.10955050587654114, 'timestamp': '2025-09-30 22:15:52.112594', 'step': 3111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:52.170599', 'step': 3111, 'epoch': 1} {'type': 'loss', 'content': 0.21236969530582428, 'timestamp': '2025-09-30 22:15:52.178834', 'step': 3112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:52.236473', 'step': 3112, 'epoch': 1} {'type': 'loss', 'content': 0.13788771629333496, 'timestamp': '2025-09-30 22:15:52.239321', 'step': 3113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:52.297301', 'step': 3113, 'epoch': 1} {'type': 'loss', 'content': 0.19583767652511597, 'timestamp': '2025-09-30 22:15:52.306527', 'step': 3114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:52.364955', 'step': 3114, 'epoch': 1} {'type': 'loss', 'content': 0.15482734143733978, 'timestamp': '2025-09-30 22:15:52.369343', 'step': 3115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:52.425967', 'step': 3115, 'epoch': 1} {'type': 'loss', 'content': 0.2918016016483307, 'timestamp': '2025-09-30 22:15:52.431931', 'step': 3116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:52.488131', 'step': 3116, 'epoch': 1} {'type': 'loss', 'content': 0.17466934025287628, 'timestamp': '2025-09-30 22:15:52.496265', 'step': 3117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:52.553283', 'step': 3117, 'epoch': 1} {'type': 'loss', 'content': 0.16233746707439423, 'timestamp': '2025-09-30 22:15:52.556921', 'step': 3118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:52.613259', 'step': 3118, 'epoch': 1} {'type': 'loss', 'content': 0.14640142023563385, 'timestamp': '2025-09-30 22:15:52.615895', 'step': 3119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:52.672665', 'step': 3119, 'epoch': 1} {'type': 'loss', 'content': 0.1564459204673767, 'timestamp': '2025-09-30 22:15:52.678690', 'step': 3120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:52.735419', 'step': 3120, 'epoch': 1} {'type': 'loss', 'content': 0.1422421783208847, 'timestamp': '2025-09-30 22:15:52.738419', 'step': 3121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:52.794665', 'step': 3121, 'epoch': 1} {'type': 'loss', 'content': 0.1946597844362259, 'timestamp': '2025-09-30 22:15:52.797026', 'step': 3122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:52.858419', 'step': 3122, 'epoch': 1} {'type': 'loss', 'content': 0.1359083652496338, 'timestamp': '2025-09-30 22:15:52.865058', 'step': 3123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:52.925201', 'step': 3123, 'epoch': 1} {'type': 'loss', 'content': 0.14671821892261505, 'timestamp': '2025-09-30 22:15:52.932103', 'step': 3124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:52.990665', 'step': 3124, 'epoch': 1} {'type': 'loss', 'content': 0.14471271634101868, 'timestamp': '2025-09-30 22:15:52.993050', 'step': 3125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:53.051256', 'step': 3125, 'epoch': 1} {'type': 'loss', 'content': 0.18015728890895844, 'timestamp': '2025-09-30 22:15:53.056263', 'step': 3126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:53.113268', 'step': 3126, 'epoch': 1} {'type': 'loss', 'content': 0.17859406769275665, 'timestamp': '2025-09-30 22:15:53.115733', 'step': 3127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:53.172845', 'step': 3127, 'epoch': 1} {'type': 'loss', 'content': 0.13900427520275116, 'timestamp': '2025-09-30 22:15:53.178603', 'step': 3128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:53.235422', 'step': 3128, 'epoch': 1} {'type': 'loss', 'content': 0.1399882584810257, 'timestamp': '2025-09-30 22:15:53.238151', 'step': 3129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:53.295682', 'step': 3129, 'epoch': 1} {'type': 'loss', 'content': 0.2746448218822479, 'timestamp': '2025-09-30 22:15:53.298847', 'step': 3130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:53.355784', 'step': 3130, 'epoch': 1} {'type': 'loss', 'content': 0.13440388441085815, 'timestamp': '2025-09-30 22:15:53.358286', 'step': 3131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:53.415869', 'step': 3131, 'epoch': 1} {'type': 'loss', 'content': 0.1532822847366333, 'timestamp': '2025-09-30 22:15:53.422189', 'step': 3132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:53.478307', 'step': 3132, 'epoch': 1} {'type': 'loss', 'content': 0.21225740015506744, 'timestamp': '2025-09-30 22:15:53.494419', 'step': 3133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:53.552944', 'step': 3133, 'epoch': 1} {'type': 'loss', 'content': 0.13671012222766876, 'timestamp': '2025-09-30 22:15:53.555761', 'step': 3134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:53.620251', 'step': 3134, 'epoch': 1} {'type': 'loss', 'content': 0.18658481538295746, 'timestamp': '2025-09-30 22:15:53.622496', 'step': 3135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:53.679762', 'step': 3135, 'epoch': 1} {'type': 'loss', 'content': 0.1813506782054901, 'timestamp': '2025-09-30 22:15:53.686272', 'step': 3136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:53.742249', 'step': 3136, 'epoch': 1} {'type': 'loss', 'content': 0.1517525315284729, 'timestamp': '2025-09-30 22:15:53.744987', 'step': 3137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:53.801444', 'step': 3137, 'epoch': 1} {'type': 'loss', 'content': 0.11461358517408371, 'timestamp': '2025-09-30 22:15:53.805254', 'step': 3138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:53.862077', 'step': 3138, 'epoch': 1} {'type': 'loss', 'content': 0.12179894745349884, 'timestamp': '2025-09-30 22:15:53.865084', 'step': 3139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:53.929918', 'step': 3139, 'epoch': 1} {'type': 'loss', 'content': 0.2166222482919693, 'timestamp': '2025-09-30 22:15:53.935905', 'step': 3140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:15:53.992874', 'step': 3140, 'epoch': 1} {'type': 'loss', 'content': 0.2232293039560318, 'timestamp': '2025-09-30 22:15:53.997108', 'step': 3141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:54.060137', 'step': 3141, 'epoch': 1} {'type': 'loss', 'content': 0.13904733955860138, 'timestamp': '2025-09-30 22:15:54.063202', 'step': 3142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:54.120185', 'step': 3142, 'epoch': 1} {'type': 'loss', 'content': 0.352585107088089, 'timestamp': '2025-09-30 22:15:54.124572', 'step': 3143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:54.182316', 'step': 3143, 'epoch': 1} {'type': 'loss', 'content': 0.20249412953853607, 'timestamp': '2025-09-30 22:15:54.188678', 'step': 3144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:54.245674', 'step': 3144, 'epoch': 1} {'type': 'loss', 'content': 0.17987790703773499, 'timestamp': '2025-09-30 22:15:54.248176', 'step': 3145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:54.308148', 'step': 3145, 'epoch': 1} {'type': 'loss', 'content': 0.22186729311943054, 'timestamp': '2025-09-30 22:15:54.319598', 'step': 3146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:54.379461', 'step': 3146, 'epoch': 1} {'type': 'loss', 'content': 0.16976624727249146, 'timestamp': '2025-09-30 22:15:54.384015', 'step': 3147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:54.442449', 'step': 3147, 'epoch': 1} {'type': 'loss', 'content': 0.27411720156669617, 'timestamp': '2025-09-30 22:15:54.448602', 'step': 3148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:54.508916', 'step': 3148, 'epoch': 1} {'type': 'loss', 'content': 0.19899484515190125, 'timestamp': '2025-09-30 22:15:54.512016', 'step': 3149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:54.570180', 'step': 3149, 'epoch': 1} {'type': 'loss', 'content': 0.18266580998897552, 'timestamp': '2025-09-30 22:15:54.577456', 'step': 3150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:54.641233', 'step': 3150, 'epoch': 1} {'type': 'loss', 'content': 0.308744341135025, 'timestamp': '2025-09-30 22:15:54.645483', 'step': 3151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:54.706399', 'step': 3151, 'epoch': 1} {'type': 'loss', 'content': 0.14451198279857635, 'timestamp': '2025-09-30 22:15:54.714790', 'step': 3152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:54.776824', 'step': 3152, 'epoch': 1} {'type': 'loss', 'content': 0.155656099319458, 'timestamp': '2025-09-30 22:15:54.779761', 'step': 3153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:54.842112', 'step': 3153, 'epoch': 1} {'type': 'loss', 'content': 0.12888900935649872, 'timestamp': '2025-09-30 22:15:54.854747', 'step': 3154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:54.927371', 'step': 3154, 'epoch': 1} {'type': 'loss', 'content': 0.18146489560604095, 'timestamp': '2025-09-30 22:15:54.936049', 'step': 3155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:54.998442', 'step': 3155, 'epoch': 1} {'type': 'loss', 'content': 0.2122795730829239, 'timestamp': '2025-09-30 22:15:55.005234', 'step': 3156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:55.063399', 'step': 3156, 'epoch': 1} {'type': 'loss', 'content': 0.21736197173595428, 'timestamp': '2025-09-30 22:15:55.066419', 'step': 3157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:55.124097', 'step': 3157, 'epoch': 1} {'type': 'loss', 'content': 0.2305467277765274, 'timestamp': '2025-09-30 22:15:55.126139', 'step': 3158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:55.186062', 'step': 3158, 'epoch': 1} {'type': 'loss', 'content': 0.13649167120456696, 'timestamp': '2025-09-30 22:15:55.191525', 'step': 3159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:55.251395', 'step': 3159, 'epoch': 1} {'type': 'loss', 'content': 0.18672169744968414, 'timestamp': '2025-09-30 22:15:55.257747', 'step': 3160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:55.314890', 'step': 3160, 'epoch': 1} {'type': 'loss', 'content': 0.1831480860710144, 'timestamp': '2025-09-30 22:15:55.317618', 'step': 3161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:55.375216', 'step': 3161, 'epoch': 1} {'type': 'loss', 'content': 0.13930848240852356, 'timestamp': '2025-09-30 22:15:55.377638', 'step': 3162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:55.434983', 'step': 3162, 'epoch': 1} {'type': 'loss', 'content': 0.25384601950645447, 'timestamp': '2025-09-30 22:15:55.437498', 'step': 3163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:55.494233', 'step': 3163, 'epoch': 1} {'type': 'loss', 'content': 0.2473173886537552, 'timestamp': '2025-09-30 22:15:55.500458', 'step': 3164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:55.557186', 'step': 3164, 'epoch': 1} {'type': 'loss', 'content': 0.29342690110206604, 'timestamp': '2025-09-30 22:15:55.560266', 'step': 3165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:55.618179', 'step': 3165, 'epoch': 1} {'type': 'loss', 'content': 0.20944741368293762, 'timestamp': '2025-09-30 22:15:55.620635', 'step': 3166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:55.679194', 'step': 3166, 'epoch': 1} {'type': 'loss', 'content': 0.21565066277980804, 'timestamp': '2025-09-30 22:15:55.682829', 'step': 3167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:55.740822', 'step': 3167, 'epoch': 1} {'type': 'loss', 'content': 0.21312259137630463, 'timestamp': '2025-09-30 22:15:55.746994', 'step': 3168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:55.810150', 'step': 3168, 'epoch': 1} {'type': 'loss', 'content': 0.09627147018909454, 'timestamp': '2025-09-30 22:15:55.812444', 'step': 3169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:55.871549', 'step': 3169, 'epoch': 1} {'type': 'loss', 'content': 0.12056723237037659, 'timestamp': '2025-09-30 22:15:55.874531', 'step': 3170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:55.933948', 'step': 3170, 'epoch': 1} {'type': 'loss', 'content': 0.16240160167217255, 'timestamp': '2025-09-30 22:15:55.937633', 'step': 3171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:55.996742', 'step': 3171, 'epoch': 1} {'type': 'loss', 'content': 0.22405965626239777, 'timestamp': '2025-09-30 22:15:56.003309', 'step': 3172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:56.063480', 'step': 3172, 'epoch': 1} {'type': 'loss', 'content': 0.17629991471767426, 'timestamp': '2025-09-30 22:15:56.066653', 'step': 3173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:56.123796', 'step': 3173, 'epoch': 1} {'type': 'loss', 'content': 0.15812209248542786, 'timestamp': '2025-09-30 22:15:56.126173', 'step': 3174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:56.187973', 'step': 3174, 'epoch': 1} {'type': 'loss', 'content': 0.17163006961345673, 'timestamp': '2025-09-30 22:15:56.191161', 'step': 3175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:56.249663', 'step': 3175, 'epoch': 1} {'type': 'loss', 'content': 0.23661138117313385, 'timestamp': '2025-09-30 22:15:56.256764', 'step': 3176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:56.312940', 'step': 3176, 'epoch': 1} {'type': 'loss', 'content': 0.19670824706554413, 'timestamp': '2025-09-30 22:15:56.317215', 'step': 3177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:56.376324', 'step': 3177, 'epoch': 1} {'type': 'loss', 'content': 0.224590003490448, 'timestamp': '2025-09-30 22:15:56.379102', 'step': 3178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:56.440686', 'step': 3178, 'epoch': 1} {'type': 'loss', 'content': 0.17976613342761993, 'timestamp': '2025-09-30 22:15:56.452023', 'step': 3179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:56.512123', 'step': 3179, 'epoch': 1} {'type': 'loss', 'content': 0.15624305605888367, 'timestamp': '2025-09-30 22:15:56.518387', 'step': 3180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:56.576159', 'step': 3180, 'epoch': 1} {'type': 'loss', 'content': 0.16561123728752136, 'timestamp': '2025-09-30 22:15:56.578751', 'step': 3181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:56.638036', 'step': 3181, 'epoch': 1} {'type': 'loss', 'content': 0.14695963263511658, 'timestamp': '2025-09-30 22:15:56.640400', 'step': 3182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:56.697144', 'step': 3182, 'epoch': 1} {'type': 'loss', 'content': 0.17405101656913757, 'timestamp': '2025-09-30 22:15:56.704315', 'step': 3183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:56.761565', 'step': 3183, 'epoch': 1} {'type': 'loss', 'content': 0.17995144426822662, 'timestamp': '2025-09-30 22:15:56.768498', 'step': 3184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:56.824211', 'step': 3184, 'epoch': 1} {'type': 'loss', 'content': 0.2669578492641449, 'timestamp': '2025-09-30 22:15:56.828147', 'step': 3185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:56.886090', 'step': 3185, 'epoch': 1} {'type': 'loss', 'content': 0.21915844082832336, 'timestamp': '2025-09-30 22:15:56.888935', 'step': 3186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:56.956616', 'step': 3186, 'epoch': 1} {'type': 'loss', 'content': 0.170378640294075, 'timestamp': '2025-09-30 22:15:56.959225', 'step': 3187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:57.018186', 'step': 3187, 'epoch': 1} {'type': 'loss', 'content': 0.22767595946788788, 'timestamp': '2025-09-30 22:15:57.031421', 'step': 3188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:57.088893', 'step': 3188, 'epoch': 1} {'type': 'loss', 'content': 0.16752728819847107, 'timestamp': '2025-09-30 22:15:57.093286', 'step': 3189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:57.151969', 'step': 3189, 'epoch': 1} {'type': 'loss', 'content': 0.13409750163555145, 'timestamp': '2025-09-30 22:15:57.154905', 'step': 3190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:57.211286', 'step': 3190, 'epoch': 1} {'type': 'loss', 'content': 0.1303279995918274, 'timestamp': '2025-09-30 22:15:57.214856', 'step': 3191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:57.271258', 'step': 3191, 'epoch': 1} {'type': 'loss', 'content': 0.143915057182312, 'timestamp': '2025-09-30 22:15:57.281662', 'step': 3192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:57.338938', 'step': 3192, 'epoch': 1} {'type': 'loss', 'content': 0.11984595656394958, 'timestamp': '2025-09-30 22:15:57.342492', 'step': 3193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:57.399508', 'step': 3193, 'epoch': 1} {'type': 'loss', 'content': 0.21037250757217407, 'timestamp': '2025-09-30 22:15:57.402643', 'step': 3194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:57.460887', 'step': 3194, 'epoch': 1} {'type': 'loss', 'content': 0.1353965848684311, 'timestamp': '2025-09-30 22:15:57.463206', 'step': 3195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:57.519088', 'step': 3195, 'epoch': 1} {'type': 'loss', 'content': 0.1347123384475708, 'timestamp': '2025-09-30 22:15:57.526010', 'step': 3196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:57.587195', 'step': 3196, 'epoch': 1} {'type': 'loss', 'content': 0.0987251028418541, 'timestamp': '2025-09-30 22:15:57.591657', 'step': 3197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:15:57.652864', 'step': 3197, 'epoch': 1} {'type': 'loss', 'content': 0.09746391326189041, 'timestamp': '2025-09-30 22:15:57.655709', 'step': 3198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:57.713854', 'step': 3198, 'epoch': 1} {'type': 'loss', 'content': 0.22083643078804016, 'timestamp': '2025-09-30 22:15:57.716496', 'step': 3199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:57.773930', 'step': 3199, 'epoch': 1} {'type': 'loss', 'content': 0.13813945651054382, 'timestamp': '2025-09-30 22:15:57.780302', 'step': 3200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:57.837944', 'step': 3200, 'epoch': 1} {'type': 'loss', 'content': 0.21090157330036163, 'timestamp': '2025-09-30 22:15:57.840161', 'step': 3201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:57.896448', 'step': 3201, 'epoch': 1} {'type': 'loss', 'content': 0.2634550929069519, 'timestamp': '2025-09-30 22:15:57.900325', 'step': 3202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:57.959149', 'step': 3202, 'epoch': 1} {'type': 'loss', 'content': 0.1853800117969513, 'timestamp': '2025-09-30 22:15:57.968364', 'step': 3203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:58.035255', 'step': 3203, 'epoch': 1} {'type': 'loss', 'content': 0.19433985650539398, 'timestamp': '2025-09-30 22:15:58.043451', 'step': 3204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:58.103572', 'step': 3204, 'epoch': 1} {'type': 'loss', 'content': 0.23765601217746735, 'timestamp': '2025-09-30 22:15:58.107125', 'step': 3205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:58.163500', 'step': 3205, 'epoch': 1} {'type': 'loss', 'content': 0.1589784026145935, 'timestamp': '2025-09-30 22:15:58.166134', 'step': 3206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:58.223950', 'step': 3206, 'epoch': 1} {'type': 'loss', 'content': 0.13123714923858643, 'timestamp': '2025-09-30 22:15:58.227220', 'step': 3207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:58.283411', 'step': 3207, 'epoch': 1} {'type': 'loss', 'content': 0.1466616541147232, 'timestamp': '2025-09-30 22:15:58.289863', 'step': 3208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:58.354739', 'step': 3208, 'epoch': 1} {'type': 'loss', 'content': 0.10001461952924728, 'timestamp': '2025-09-30 22:15:58.357197', 'step': 3209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:58.414059', 'step': 3209, 'epoch': 1} {'type': 'loss', 'content': 0.24297596514225006, 'timestamp': '2025-09-30 22:15:58.416317', 'step': 3210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:58.472757', 'step': 3210, 'epoch': 1} {'type': 'loss', 'content': 0.20485439896583557, 'timestamp': '2025-09-30 22:15:58.475735', 'step': 3211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:58.532317', 'step': 3211, 'epoch': 1} {'type': 'loss', 'content': 0.15905922651290894, 'timestamp': '2025-09-30 22:15:58.538123', 'step': 3212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:58.593445', 'step': 3212, 'epoch': 1} {'type': 'loss', 'content': 0.16118112206459045, 'timestamp': '2025-09-30 22:15:58.595872', 'step': 3213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:58.652465', 'step': 3213, 'epoch': 1} {'type': 'loss', 'content': 0.1550399363040924, 'timestamp': '2025-09-30 22:15:58.655085', 'step': 3214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:58.712097', 'step': 3214, 'epoch': 1} {'type': 'loss', 'content': 0.19985976815223694, 'timestamp': '2025-09-30 22:15:58.717431', 'step': 3215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:58.775248', 'step': 3215, 'epoch': 1} {'type': 'loss', 'content': 0.14553171396255493, 'timestamp': '2025-09-30 22:15:58.781568', 'step': 3216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:58.837393', 'step': 3216, 'epoch': 1} {'type': 'loss', 'content': 0.24231815338134766, 'timestamp': '2025-09-30 22:15:58.839924', 'step': 3217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:58.898984', 'step': 3217, 'epoch': 1} {'type': 'loss', 'content': 0.1539517343044281, 'timestamp': '2025-09-30 22:15:58.902084', 'step': 3218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:58.959968', 'step': 3218, 'epoch': 1} {'type': 'loss', 'content': 0.17268849909305573, 'timestamp': '2025-09-30 22:15:58.977462', 'step': 3219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:59.036834', 'step': 3219, 'epoch': 1} {'type': 'loss', 'content': 0.13809680938720703, 'timestamp': '2025-09-30 22:15:59.043476', 'step': 3220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:59.099914', 'step': 3220, 'epoch': 1} {'type': 'loss', 'content': 0.19978690147399902, 'timestamp': '2025-09-30 22:15:59.104879', 'step': 3221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:59.162294', 'step': 3221, 'epoch': 1} {'type': 'loss', 'content': 0.1681338995695114, 'timestamp': '2025-09-30 22:15:59.166456', 'step': 3222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:59.223477', 'step': 3222, 'epoch': 1} {'type': 'loss', 'content': 0.2736402153968811, 'timestamp': '2025-09-30 22:15:59.234630', 'step': 3223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:15:59.291552', 'step': 3223, 'epoch': 1} {'type': 'loss', 'content': 0.14580510556697845, 'timestamp': '2025-09-30 22:15:59.298498', 'step': 3224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:15:59.356177', 'step': 3224, 'epoch': 1} {'type': 'loss', 'content': 0.16597189009189606, 'timestamp': '2025-09-30 22:15:59.368898', 'step': 3225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:59.427199', 'step': 3225, 'epoch': 1} {'type': 'loss', 'content': 0.1542571783065796, 'timestamp': '2025-09-30 22:15:59.431916', 'step': 3226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:59.494199', 'step': 3226, 'epoch': 1} {'type': 'loss', 'content': 0.18104104697704315, 'timestamp': '2025-09-30 22:15:59.503101', 'step': 3227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:59.562023', 'step': 3227, 'epoch': 1} {'type': 'loss', 'content': 0.2849101722240448, 'timestamp': '2025-09-30 22:15:59.570319', 'step': 3228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:59.642621', 'step': 3228, 'epoch': 1} {'type': 'loss', 'content': 0.10222508758306503, 'timestamp': '2025-09-30 22:15:59.650741', 'step': 3229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:15:59.721721', 'step': 3229, 'epoch': 1} {'type': 'loss', 'content': 0.17173929512500763, 'timestamp': '2025-09-30 22:15:59.727778', 'step': 3230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:15:59.786360', 'step': 3230, 'epoch': 1} {'type': 'loss', 'content': 0.1899384707212448, 'timestamp': '2025-09-30 22:15:59.794794', 'step': 3231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:15:59.855074', 'step': 3231, 'epoch': 1} {'type': 'loss', 'content': 0.21629151701927185, 'timestamp': '2025-09-30 22:15:59.863120', 'step': 3232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:15:59.926711', 'step': 3232, 'epoch': 1} {'type': 'loss', 'content': 0.11011464148759842, 'timestamp': '2025-09-30 22:15:59.930155', 'step': 3233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:15:59.999048', 'step': 3233, 'epoch': 1} {'type': 'loss', 'content': 0.11370294541120529, 'timestamp': '2025-09-30 22:16:00.004366', 'step': 3234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:00.069119', 'step': 3234, 'epoch': 1} {'type': 'loss', 'content': 0.21692608296871185, 'timestamp': '2025-09-30 22:16:00.073757', 'step': 3235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:00.134511', 'step': 3235, 'epoch': 1} {'type': 'loss', 'content': 0.1700606793165207, 'timestamp': '2025-09-30 22:16:00.140441', 'step': 3236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:00.203028', 'step': 3236, 'epoch': 1} {'type': 'loss', 'content': 0.24808275699615479, 'timestamp': '2025-09-30 22:16:00.206269', 'step': 3237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:00.273279', 'step': 3237, 'epoch': 1} {'type': 'loss', 'content': 0.1946779042482376, 'timestamp': '2025-09-30 22:16:00.276802', 'step': 3238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:00.339916', 'step': 3238, 'epoch': 1} {'type': 'loss', 'content': 0.14084561169147491, 'timestamp': '2025-09-30 22:16:00.344412', 'step': 3239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:00.404002', 'step': 3239, 'epoch': 1} {'type': 'loss', 'content': 0.19314783811569214, 'timestamp': '2025-09-30 22:16:00.418754', 'step': 3240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:00.480957', 'step': 3240, 'epoch': 1} {'type': 'loss', 'content': 0.18072767555713654, 'timestamp': '2025-09-30 22:16:00.484809', 'step': 3241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:00.543644', 'step': 3241, 'epoch': 1} {'type': 'loss', 'content': 0.21639271080493927, 'timestamp': '2025-09-30 22:16:00.546930', 'step': 3242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:00.615272', 'step': 3242, 'epoch': 1} {'type': 'loss', 'content': 0.1461937576532364, 'timestamp': '2025-09-30 22:16:00.624633', 'step': 3243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:00.686700', 'step': 3243, 'epoch': 1} {'type': 'loss', 'content': 0.261225163936615, 'timestamp': '2025-09-30 22:16:00.698356', 'step': 3244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:00.755377', 'step': 3244, 'epoch': 1} {'type': 'loss', 'content': 0.09480614215135574, 'timestamp': '2025-09-30 22:16:00.770304', 'step': 3245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:00.840410', 'step': 3245, 'epoch': 1} {'type': 'loss', 'content': 0.22320210933685303, 'timestamp': '2025-09-30 22:16:00.847174', 'step': 3246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:00.911723', 'step': 3246, 'epoch': 1} {'type': 'loss', 'content': 0.10154245793819427, 'timestamp': '2025-09-30 22:16:00.919228', 'step': 3247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:00.977228', 'step': 3247, 'epoch': 1} {'type': 'loss', 'content': 0.20020338892936707, 'timestamp': '2025-09-30 22:16:00.986355', 'step': 3248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:01.049433', 'step': 3248, 'epoch': 1} {'type': 'loss', 'content': 0.2448507696390152, 'timestamp': '2025-09-30 22:16:01.052794', 'step': 3249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:01.114641', 'step': 3249, 'epoch': 1} {'type': 'loss', 'content': 0.19329112768173218, 'timestamp': '2025-09-30 22:16:01.117588', 'step': 3250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:01.181425', 'step': 3250, 'epoch': 1} {'type': 'loss', 'content': 0.17187587916851044, 'timestamp': '2025-09-30 22:16:01.184381', 'step': 3251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:01.240860', 'step': 3251, 'epoch': 1} {'type': 'loss', 'content': 0.1541084200143814, 'timestamp': '2025-09-30 22:16:01.255442', 'step': 3252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:01.315858', 'step': 3252, 'epoch': 1} {'type': 'loss', 'content': 0.13582320511341095, 'timestamp': '2025-09-30 22:16:01.325265', 'step': 3253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:01.383593', 'step': 3253, 'epoch': 1} {'type': 'loss', 'content': 0.09604931622743607, 'timestamp': '2025-09-30 22:16:01.392141', 'step': 3254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:01.464149', 'step': 3254, 'epoch': 1} {'type': 'loss', 'content': 0.13682018220424652, 'timestamp': '2025-09-30 22:16:01.467560', 'step': 3255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:01.532399', 'step': 3255, 'epoch': 1} {'type': 'loss', 'content': 0.08708804845809937, 'timestamp': '2025-09-30 22:16:01.538292', 'step': 3256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:01.597362', 'step': 3256, 'epoch': 1} {'type': 'loss', 'content': 0.1668732464313507, 'timestamp': '2025-09-30 22:16:01.602647', 'step': 3257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:01.659063', 'step': 3257, 'epoch': 1} {'type': 'loss', 'content': 0.19336652755737305, 'timestamp': '2025-09-30 22:16:01.664899', 'step': 3258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:01.722133', 'step': 3258, 'epoch': 1} {'type': 'loss', 'content': 0.22561389207839966, 'timestamp': '2025-09-30 22:16:01.725301', 'step': 3259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:01.784715', 'step': 3259, 'epoch': 1} {'type': 'loss', 'content': 0.14658799767494202, 'timestamp': '2025-09-30 22:16:01.791351', 'step': 3260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:01.855939', 'step': 3260, 'epoch': 1} {'type': 'loss', 'content': 0.1842603087425232, 'timestamp': '2025-09-30 22:16:01.859539', 'step': 3261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:01.923040', 'step': 3261, 'epoch': 1} {'type': 'loss', 'content': 0.1166205108165741, 'timestamp': '2025-09-30 22:16:01.929388', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:16:15.336300', 'step': 3262, 'epoch': 1} {'type': 'pplx', 'content': 9795.964899152783, 'timestamp': '2025-09-30 22:16:15.346546', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:15.421016', 'step': 3262, 'epoch': 1} {'type': 'loss', 'content': 0.18539205193519592, 'timestamp': '2025-09-30 22:16:15.439384', 'step': 3263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:16:15.504728', 'step': 3263, 'epoch': 1} {'type': 'loss', 'content': 0.1953744888305664, 'timestamp': '2025-09-30 22:16:15.512411', 'step': 3264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:15.569910', 'step': 3264, 'epoch': 1} {'type': 'loss', 'content': 0.12920260429382324, 'timestamp': '2025-09-30 22:16:15.573398', 'step': 3265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:15.630228', 'step': 3265, 'epoch': 1} {'type': 'loss', 'content': 0.2595469355583191, 'timestamp': '2025-09-30 22:16:15.633668', 'step': 3266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:15.691167', 'step': 3266, 'epoch': 1} {'type': 'loss', 'content': 0.23633542656898499, 'timestamp': '2025-09-30 22:16:15.695499', 'step': 3267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:15.752409', 'step': 3267, 'epoch': 1} {'type': 'loss', 'content': 0.1357700526714325, 'timestamp': '2025-09-30 22:16:15.767110', 'step': 3268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:15.824016', 'step': 3268, 'epoch': 1} {'type': 'loss', 'content': 0.19595566391944885, 'timestamp': '2025-09-30 22:16:15.828224', 'step': 3269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:15.885862', 'step': 3269, 'epoch': 1} {'type': 'loss', 'content': 0.2059224545955658, 'timestamp': '2025-09-30 22:16:15.889173', 'step': 3270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:15.951828', 'step': 3270, 'epoch': 1} {'type': 'loss', 'content': 0.16797488927841187, 'timestamp': '2025-09-30 22:16:15.962430', 'step': 3271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:16.025595', 'step': 3271, 'epoch': 1} {'type': 'loss', 'content': 0.08996470272541046, 'timestamp': '2025-09-30 22:16:16.031807', 'step': 3272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:16.088815', 'step': 3272, 'epoch': 1} {'type': 'loss', 'content': 0.19046245515346527, 'timestamp': '2025-09-30 22:16:16.091860', 'step': 3273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:16.148931', 'step': 3273, 'epoch': 1} {'type': 'loss', 'content': 0.20432838797569275, 'timestamp': '2025-09-30 22:16:16.152934', 'step': 3274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:16.211247', 'step': 3274, 'epoch': 1} {'type': 'loss', 'content': 0.09079128503799438, 'timestamp': '2025-09-30 22:16:16.214250', 'step': 3275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:16.271225', 'step': 3275, 'epoch': 1} {'type': 'loss', 'content': 0.154728963971138, 'timestamp': '2025-09-30 22:16:16.280075', 'step': 3276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:16.340835', 'step': 3276, 'epoch': 1} {'type': 'loss', 'content': 0.1858060359954834, 'timestamp': '2025-09-30 22:16:16.346020', 'step': 3277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:16.410264', 'step': 3277, 'epoch': 1} {'type': 'loss', 'content': 0.1041010320186615, 'timestamp': '2025-09-30 22:16:16.413901', 'step': 3278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:16.470705', 'step': 3278, 'epoch': 1} {'type': 'loss', 'content': 0.21300958096981049, 'timestamp': '2025-09-30 22:16:16.475230', 'step': 3279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:16.532611', 'step': 3279, 'epoch': 1} {'type': 'loss', 'content': 0.15329664945602417, 'timestamp': '2025-09-30 22:16:16.546441', 'step': 3280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:16.603596', 'step': 3280, 'epoch': 1} {'type': 'loss', 'content': 0.12013357877731323, 'timestamp': '2025-09-30 22:16:16.606890', 'step': 3281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:16.671430', 'step': 3281, 'epoch': 1} {'type': 'loss', 'content': 0.0819435715675354, 'timestamp': '2025-09-30 22:16:16.680191', 'step': 3282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:16.737983', 'step': 3282, 'epoch': 1} {'type': 'loss', 'content': 0.20047882199287415, 'timestamp': '2025-09-30 22:16:16.740745', 'step': 3283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:16.798222', 'step': 3283, 'epoch': 1} {'type': 'loss', 'content': 0.17032267153263092, 'timestamp': '2025-09-30 22:16:16.810478', 'step': 3284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:16.872296', 'step': 3284, 'epoch': 1} {'type': 'loss', 'content': 0.2558896243572235, 'timestamp': '2025-09-30 22:16:16.876925', 'step': 3285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:16.943170', 'step': 3285, 'epoch': 1} {'type': 'loss', 'content': 0.20101504027843475, 'timestamp': '2025-09-30 22:16:16.947408', 'step': 3286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:17.016996', 'step': 3286, 'epoch': 1} {'type': 'loss', 'content': 0.1379796862602234, 'timestamp': '2025-09-30 22:16:17.032011', 'step': 3287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:17.091568', 'step': 3287, 'epoch': 1} {'type': 'loss', 'content': 0.21310798823833466, 'timestamp': '2025-09-30 22:16:17.099757', 'step': 3288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:17.157090', 'step': 3288, 'epoch': 1} {'type': 'loss', 'content': 0.21373285353183746, 'timestamp': '2025-09-30 22:16:17.160067', 'step': 3289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:17.217771', 'step': 3289, 'epoch': 1} {'type': 'loss', 'content': 0.2936854660511017, 'timestamp': '2025-09-30 22:16:17.221611', 'step': 3290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:17.292616', 'step': 3290, 'epoch': 1} {'type': 'loss', 'content': 0.17772875726222992, 'timestamp': '2025-09-30 22:16:17.295818', 'step': 3291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:17.353972', 'step': 3291, 'epoch': 1} {'type': 'loss', 'content': 0.06335792690515518, 'timestamp': '2025-09-30 22:16:17.364241', 'step': 3292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:17.421109', 'step': 3292, 'epoch': 1} {'type': 'loss', 'content': 0.29315146803855896, 'timestamp': '2025-09-30 22:16:17.431375', 'step': 3293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:17.489241', 'step': 3293, 'epoch': 1} {'type': 'loss', 'content': 0.1929667890071869, 'timestamp': '2025-09-30 22:16:17.492644', 'step': 3294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:17.567713', 'step': 3294, 'epoch': 1} {'type': 'loss', 'content': 0.14570355415344238, 'timestamp': '2025-09-30 22:16:17.581417', 'step': 3295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:17.640388', 'step': 3295, 'epoch': 1} {'type': 'loss', 'content': 0.1433734893798828, 'timestamp': '2025-09-30 22:16:17.651096', 'step': 3296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:17.708893', 'step': 3296, 'epoch': 1} {'type': 'loss', 'content': 0.13705487549304962, 'timestamp': '2025-09-30 22:16:17.712006', 'step': 3297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:17.781037', 'step': 3297, 'epoch': 1} {'type': 'loss', 'content': 0.13636599481105804, 'timestamp': '2025-09-30 22:16:17.786545', 'step': 3298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:17.845747', 'step': 3298, 'epoch': 1} {'type': 'loss', 'content': 0.13492299616336823, 'timestamp': '2025-09-30 22:16:17.851005', 'step': 3299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:17.921806', 'step': 3299, 'epoch': 1} {'type': 'loss', 'content': 0.173582524061203, 'timestamp': '2025-09-30 22:16:17.929120', 'step': 3300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:17.995420', 'step': 3300, 'epoch': 1} {'type': 'loss', 'content': 0.14821070432662964, 'timestamp': '2025-09-30 22:16:17.998889', 'step': 3301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:18.057176', 'step': 3301, 'epoch': 1} {'type': 'loss', 'content': 0.25392037630081177, 'timestamp': '2025-09-30 22:16:18.060546', 'step': 3302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:18.122771', 'step': 3302, 'epoch': 1} {'type': 'loss', 'content': 0.211232990026474, 'timestamp': '2025-09-30 22:16:18.126570', 'step': 3303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:18.187894', 'step': 3303, 'epoch': 1} {'type': 'loss', 'content': 0.13998226821422577, 'timestamp': '2025-09-30 22:16:18.206868', 'step': 3304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:18.263851', 'step': 3304, 'epoch': 1} {'type': 'loss', 'content': 0.26357752084732056, 'timestamp': '2025-09-30 22:16:18.282321', 'step': 3305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:18.345991', 'step': 3305, 'epoch': 1} {'type': 'loss', 'content': 0.1384793519973755, 'timestamp': '2025-09-30 22:16:18.353877', 'step': 3306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:18.411982', 'step': 3306, 'epoch': 1} {'type': 'loss', 'content': 0.1757144033908844, 'timestamp': '2025-09-30 22:16:18.415160', 'step': 3307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:18.485159', 'step': 3307, 'epoch': 1} {'type': 'loss', 'content': 0.15909992158412933, 'timestamp': '2025-09-30 22:16:18.492605', 'step': 3308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:18.549831', 'step': 3308, 'epoch': 1} {'type': 'loss', 'content': 0.16892306506633759, 'timestamp': '2025-09-30 22:16:18.564079', 'step': 3309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:18.629469', 'step': 3309, 'epoch': 1} {'type': 'loss', 'content': 0.1649026721715927, 'timestamp': '2025-09-30 22:16:18.633004', 'step': 3310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:18.690751', 'step': 3310, 'epoch': 1} {'type': 'loss', 'content': 0.2289397120475769, 'timestamp': '2025-09-30 22:16:18.693880', 'step': 3311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:18.751648', 'step': 3311, 'epoch': 1} {'type': 'loss', 'content': 0.178055539727211, 'timestamp': '2025-09-30 22:16:18.758465', 'step': 3312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:18.815381', 'step': 3312, 'epoch': 1} {'type': 'loss', 'content': 0.13880984485149384, 'timestamp': '2025-09-30 22:16:18.819300', 'step': 3313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:18.877091', 'step': 3313, 'epoch': 1} {'type': 'loss', 'content': 0.16991370916366577, 'timestamp': '2025-09-30 22:16:18.880020', 'step': 3314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:18.945849', 'step': 3314, 'epoch': 1} {'type': 'loss', 'content': 0.14817482233047485, 'timestamp': '2025-09-30 22:16:18.950948', 'step': 3315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:19.014759', 'step': 3315, 'epoch': 1} {'type': 'loss', 'content': 0.13253740966320038, 'timestamp': '2025-09-30 22:16:19.021850', 'step': 3316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:19.078627', 'step': 3316, 'epoch': 1} {'type': 'loss', 'content': 0.13159291446208954, 'timestamp': '2025-09-30 22:16:19.082499', 'step': 3317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:19.145400', 'step': 3317, 'epoch': 1} {'type': 'loss', 'content': 0.1510346531867981, 'timestamp': '2025-09-30 22:16:19.149023', 'step': 3318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:19.206056', 'step': 3318, 'epoch': 1} {'type': 'loss', 'content': 0.17794807255268097, 'timestamp': '2025-09-30 22:16:19.209862', 'step': 3319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:19.277754', 'step': 3319, 'epoch': 1} {'type': 'loss', 'content': 0.13776026666164398, 'timestamp': '2025-09-30 22:16:19.283651', 'step': 3320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:19.347576', 'step': 3320, 'epoch': 1} {'type': 'loss', 'content': 0.23956620693206787, 'timestamp': '2025-09-30 22:16:19.355571', 'step': 3321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:19.415467', 'step': 3321, 'epoch': 1} {'type': 'loss', 'content': 0.11397206038236618, 'timestamp': '2025-09-30 22:16:19.418083', 'step': 3322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:16:19.493982', 'step': 3322, 'epoch': 1} {'type': 'loss', 'content': 0.10768309980630875, 'timestamp': '2025-09-30 22:16:19.497416', 'step': 3323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:19.566691', 'step': 3323, 'epoch': 1} {'type': 'loss', 'content': 0.13857609033584595, 'timestamp': '2025-09-30 22:16:19.574206', 'step': 3324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:19.642460', 'step': 3324, 'epoch': 1} {'type': 'loss', 'content': 0.3137945532798767, 'timestamp': '2025-09-30 22:16:19.645413', 'step': 3325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:19.706138', 'step': 3325, 'epoch': 1} {'type': 'loss', 'content': 0.187592014670372, 'timestamp': '2025-09-30 22:16:19.714444', 'step': 3326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:19.778178', 'step': 3326, 'epoch': 1} {'type': 'loss', 'content': 0.24193954467773438, 'timestamp': '2025-09-30 22:16:19.781096', 'step': 3327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:16:19.838322', 'step': 3327, 'epoch': 1} {'type': 'loss', 'content': 0.19554990530014038, 'timestamp': '2025-09-30 22:16:19.844908', 'step': 3328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:19.901048', 'step': 3328, 'epoch': 1} {'type': 'loss', 'content': 0.221304252743721, 'timestamp': '2025-09-30 22:16:19.905303', 'step': 3329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:19.967908', 'step': 3329, 'epoch': 1} {'type': 'loss', 'content': 0.14276881515979767, 'timestamp': '2025-09-30 22:16:19.971615', 'step': 3330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:20.030062', 'step': 3330, 'epoch': 1} {'type': 'loss', 'content': 0.1011926680803299, 'timestamp': '2025-09-30 22:16:20.033361', 'step': 3331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:20.098152', 'step': 3331, 'epoch': 1} {'type': 'loss', 'content': 0.1844145506620407, 'timestamp': '2025-09-30 22:16:20.104779', 'step': 3332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:20.169844', 'step': 3332, 'epoch': 1} {'type': 'loss', 'content': 0.12754090130329132, 'timestamp': '2025-09-30 22:16:20.179451', 'step': 3333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:20.236968', 'step': 3333, 'epoch': 1} {'type': 'loss', 'content': 0.21332934498786926, 'timestamp': '2025-09-30 22:16:20.239886', 'step': 3334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:20.302002', 'step': 3334, 'epoch': 1} {'type': 'loss', 'content': 0.18620692193508148, 'timestamp': '2025-09-30 22:16:20.304971', 'step': 3335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:20.362218', 'step': 3335, 'epoch': 1} {'type': 'loss', 'content': 0.17492520809173584, 'timestamp': '2025-09-30 22:16:20.368945', 'step': 3336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:20.425875', 'step': 3336, 'epoch': 1} {'type': 'loss', 'content': 0.11265099048614502, 'timestamp': '2025-09-30 22:16:20.429099', 'step': 3337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:20.486736', 'step': 3337, 'epoch': 1} {'type': 'loss', 'content': 0.14213572442531586, 'timestamp': '2025-09-30 22:16:20.490616', 'step': 3338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:20.548469', 'step': 3338, 'epoch': 1} {'type': 'loss', 'content': 0.2701945900917053, 'timestamp': '2025-09-30 22:16:20.551754', 'step': 3339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-30 22:16:20.621440', 'step': 3339, 'epoch': 1} {'type': 'loss', 'content': 0.21146562695503235, 'timestamp': '2025-09-30 22:16:20.634740', 'step': 3340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:20.691446', 'step': 3340, 'epoch': 1} {'type': 'loss', 'content': 0.23246364295482635, 'timestamp': '2025-09-30 22:16:20.694358', 'step': 3341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:20.756608', 'step': 3341, 'epoch': 1} {'type': 'loss', 'content': 0.269089013338089, 'timestamp': '2025-09-30 22:16:20.763871', 'step': 3342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:20.821382', 'step': 3342, 'epoch': 1} {'type': 'loss', 'content': 0.17559269070625305, 'timestamp': '2025-09-30 22:16:20.825015', 'step': 3343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:20.883073', 'step': 3343, 'epoch': 1} {'type': 'loss', 'content': 0.23035436868667603, 'timestamp': '2025-09-30 22:16:20.889992', 'step': 3344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:20.946247', 'step': 3344, 'epoch': 1} {'type': 'loss', 'content': 0.17753443121910095, 'timestamp': '2025-09-30 22:16:20.949025', 'step': 3345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:21.006167', 'step': 3345, 'epoch': 1} {'type': 'loss', 'content': 0.18753692507743835, 'timestamp': '2025-09-30 22:16:21.008890', 'step': 3346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:21.065571', 'step': 3346, 'epoch': 1} {'type': 'loss', 'content': 0.19320257008075714, 'timestamp': '2025-09-30 22:16:21.068526', 'step': 3347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:21.126098', 'step': 3347, 'epoch': 1} {'type': 'loss', 'content': 0.08254119008779526, 'timestamp': '2025-09-30 22:16:21.132428', 'step': 3348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:21.188444', 'step': 3348, 'epoch': 1} {'type': 'loss', 'content': 0.3031103014945984, 'timestamp': '2025-09-30 22:16:21.191159', 'step': 3349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:21.247358', 'step': 3349, 'epoch': 1} {'type': 'loss', 'content': 0.19972127676010132, 'timestamp': '2025-09-30 22:16:21.251826', 'step': 3350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:21.309431', 'step': 3350, 'epoch': 1} {'type': 'loss', 'content': 0.12729574739933014, 'timestamp': '2025-09-30 22:16:21.312921', 'step': 3351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:21.369595', 'step': 3351, 'epoch': 1} {'type': 'loss', 'content': 0.1628928929567337, 'timestamp': '2025-09-30 22:16:21.376421', 'step': 3352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:21.432709', 'step': 3352, 'epoch': 1} {'type': 'loss', 'content': 0.1323581486940384, 'timestamp': '2025-09-30 22:16:21.435161', 'step': 3353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:21.496184', 'step': 3353, 'epoch': 1} {'type': 'loss', 'content': 0.14332976937294006, 'timestamp': '2025-09-30 22:16:21.499622', 'step': 3354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:21.560753', 'step': 3354, 'epoch': 1} {'type': 'loss', 'content': 0.23814931511878967, 'timestamp': '2025-09-30 22:16:21.563676', 'step': 3355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:21.620238', 'step': 3355, 'epoch': 1} {'type': 'loss', 'content': 0.1929522156715393, 'timestamp': '2025-09-30 22:16:21.626751', 'step': 3356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:21.682072', 'step': 3356, 'epoch': 1} {'type': 'loss', 'content': 0.14154547452926636, 'timestamp': '2025-09-30 22:16:21.689721', 'step': 3357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:21.751456', 'step': 3357, 'epoch': 1} {'type': 'loss', 'content': 0.15072904527187347, 'timestamp': '2025-09-30 22:16:21.756058', 'step': 3358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:21.812868', 'step': 3358, 'epoch': 1} {'type': 'loss', 'content': 0.09911486506462097, 'timestamp': '2025-09-30 22:16:21.821759', 'step': 3359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:21.883970', 'step': 3359, 'epoch': 1} {'type': 'loss', 'content': 0.1961294263601303, 'timestamp': '2025-09-30 22:16:21.890751', 'step': 3360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:21.948978', 'step': 3360, 'epoch': 1} {'type': 'loss', 'content': 0.17229890823364258, 'timestamp': '2025-09-30 22:16:21.952152', 'step': 3361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:22.021275', 'step': 3361, 'epoch': 1} {'type': 'loss', 'content': 0.17996753752231598, 'timestamp': '2025-09-30 22:16:22.024664', 'step': 3362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:22.082772', 'step': 3362, 'epoch': 1} {'type': 'loss', 'content': 0.11414690315723419, 'timestamp': '2025-09-30 22:16:22.087432', 'step': 3363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:22.153530', 'step': 3363, 'epoch': 1} {'type': 'loss', 'content': 0.1548422873020172, 'timestamp': '2025-09-30 22:16:22.160115', 'step': 3364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:22.216732', 'step': 3364, 'epoch': 1} {'type': 'loss', 'content': 0.27915576100349426, 'timestamp': '2025-09-30 22:16:22.221076', 'step': 3365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:22.278033', 'step': 3365, 'epoch': 1} {'type': 'loss', 'content': 0.1173524260520935, 'timestamp': '2025-09-30 22:16:22.282132', 'step': 3366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:22.339847', 'step': 3366, 'epoch': 1} {'type': 'loss', 'content': 0.18796299397945404, 'timestamp': '2025-09-30 22:16:22.355453', 'step': 3367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:16:22.411867', 'step': 3367, 'epoch': 1} {'type': 'loss', 'content': 0.17415720224380493, 'timestamp': '2025-09-30 22:16:22.418858', 'step': 3368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:22.476743', 'step': 3368, 'epoch': 1} {'type': 'loss', 'content': 0.23945403099060059, 'timestamp': '2025-09-30 22:16:22.480198', 'step': 3369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:22.554047', 'step': 3369, 'epoch': 1} {'type': 'loss', 'content': 0.14450427889823914, 'timestamp': '2025-09-30 22:16:22.563044', 'step': 3370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:22.620579', 'step': 3370, 'epoch': 1} {'type': 'loss', 'content': 0.22812111675739288, 'timestamp': '2025-09-30 22:16:22.623410', 'step': 3371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:22.687058', 'step': 3371, 'epoch': 1} {'type': 'loss', 'content': 0.14970599114894867, 'timestamp': '2025-09-30 22:16:22.696328', 'step': 3372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:22.765474', 'step': 3372, 'epoch': 1} {'type': 'loss', 'content': 0.13265293836593628, 'timestamp': '2025-09-30 22:16:22.776309', 'step': 3373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:22.836046', 'step': 3373, 'epoch': 1} {'type': 'loss', 'content': 0.16514669358730316, 'timestamp': '2025-09-30 22:16:22.852036', 'step': 3374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:22.917386', 'step': 3374, 'epoch': 1} {'type': 'loss', 'content': 0.21097345650196075, 'timestamp': '2025-09-30 22:16:22.921167', 'step': 3375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:22.979444', 'step': 3375, 'epoch': 1} {'type': 'loss', 'content': 0.2511991560459137, 'timestamp': '2025-09-30 22:16:22.986113', 'step': 3376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:23.047191', 'step': 3376, 'epoch': 1} {'type': 'loss', 'content': 0.2516988515853882, 'timestamp': '2025-09-30 22:16:23.050009', 'step': 3377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:23.107552', 'step': 3377, 'epoch': 1} {'type': 'loss', 'content': 0.18099768459796906, 'timestamp': '2025-09-30 22:16:23.110107', 'step': 3378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:23.168776', 'step': 3378, 'epoch': 1} {'type': 'loss', 'content': 0.14210602641105652, 'timestamp': '2025-09-30 22:16:23.175422', 'step': 3379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:23.232473', 'step': 3379, 'epoch': 1} {'type': 'loss', 'content': 0.18352599442005157, 'timestamp': '2025-09-30 22:16:23.243835', 'step': 3380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:23.307242', 'step': 3380, 'epoch': 1} {'type': 'loss', 'content': 0.18285056948661804, 'timestamp': '2025-09-30 22:16:23.311856', 'step': 3381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:23.380971', 'step': 3381, 'epoch': 1} {'type': 'loss', 'content': 0.2239748239517212, 'timestamp': '2025-09-30 22:16:23.384994', 'step': 3382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:23.449337', 'step': 3382, 'epoch': 1} {'type': 'loss', 'content': 0.19293424487113953, 'timestamp': '2025-09-30 22:16:23.462383', 'step': 3383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:23.523951', 'step': 3383, 'epoch': 1} {'type': 'loss', 'content': 0.1629379391670227, 'timestamp': '2025-09-30 22:16:23.535557', 'step': 3384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:23.600387', 'step': 3384, 'epoch': 1} {'type': 'loss', 'content': 0.1989610344171524, 'timestamp': '2025-09-30 22:16:23.604286', 'step': 3385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:23.677754', 'step': 3385, 'epoch': 1} {'type': 'loss', 'content': 0.1916336715221405, 'timestamp': '2025-09-30 22:16:23.681991', 'step': 3386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:23.748781', 'step': 3386, 'epoch': 1} {'type': 'loss', 'content': 0.11919672042131424, 'timestamp': '2025-09-30 22:16:23.752828', 'step': 3387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:23.819582', 'step': 3387, 'epoch': 1} {'type': 'loss', 'content': 0.1012919619679451, 'timestamp': '2025-09-30 22:16:23.825971', 'step': 3388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:23.883540', 'step': 3388, 'epoch': 1} {'type': 'loss', 'content': 0.14230594038963318, 'timestamp': '2025-09-30 22:16:23.886364', 'step': 3389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:23.944452', 'step': 3389, 'epoch': 1} {'type': 'loss', 'content': 0.22744031250476837, 'timestamp': '2025-09-30 22:16:23.947635', 'step': 3390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:24.010177', 'step': 3390, 'epoch': 1} {'type': 'loss', 'content': 0.1649547517299652, 'timestamp': '2025-09-30 22:16:24.012993', 'step': 3391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:24.077527', 'step': 3391, 'epoch': 1} {'type': 'loss', 'content': 0.1925542950630188, 'timestamp': '2025-09-30 22:16:24.096863', 'step': 3392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.153237', 'step': 3392, 'epoch': 1} {'type': 'loss', 'content': 0.12100035697221756, 'timestamp': '2025-09-30 22:16:24.157027', 'step': 3393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:24.219752', 'step': 3393, 'epoch': 1} {'type': 'loss', 'content': 0.13151387870311737, 'timestamp': '2025-09-30 22:16:24.222730', 'step': 3394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.284341', 'step': 3394, 'epoch': 1} {'type': 'loss', 'content': 0.11396795511245728, 'timestamp': '2025-09-30 22:16:24.287626', 'step': 3395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.344573', 'step': 3395, 'epoch': 1} {'type': 'loss', 'content': 0.21064835786819458, 'timestamp': '2025-09-30 22:16:24.359327', 'step': 3396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:24.428976', 'step': 3396, 'epoch': 1} {'type': 'loss', 'content': 0.1796943098306656, 'timestamp': '2025-09-30 22:16:24.432899', 'step': 3397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.492339', 'step': 3397, 'epoch': 1} {'type': 'loss', 'content': 0.1962633579969406, 'timestamp': '2025-09-30 22:16:24.495950', 'step': 3398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.552674', 'step': 3398, 'epoch': 1} {'type': 'loss', 'content': 0.2047935277223587, 'timestamp': '2025-09-30 22:16:24.555522', 'step': 3399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.612701', 'step': 3399, 'epoch': 1} {'type': 'loss', 'content': 0.1563263088464737, 'timestamp': '2025-09-30 22:16:24.621755', 'step': 3400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.688992', 'step': 3400, 'epoch': 1} {'type': 'loss', 'content': 0.15537972748279572, 'timestamp': '2025-09-30 22:16:24.694500', 'step': 3401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:24.752477', 'step': 3401, 'epoch': 1} {'type': 'loss', 'content': 0.12386949360370636, 'timestamp': '2025-09-30 22:16:24.763571', 'step': 3402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:24.822588', 'step': 3402, 'epoch': 1} {'type': 'loss', 'content': 0.28130751848220825, 'timestamp': '2025-09-30 22:16:24.837802', 'step': 3403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:24.902928', 'step': 3403, 'epoch': 1} {'type': 'loss', 'content': 0.19731469452381134, 'timestamp': '2025-09-30 22:16:24.909432', 'step': 3404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:24.972380', 'step': 3404, 'epoch': 1} {'type': 'loss', 'content': 0.18150070309638977, 'timestamp': '2025-09-30 22:16:24.975817', 'step': 3405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:25.048989', 'step': 3405, 'epoch': 1} {'type': 'loss', 'content': 0.12921707332134247, 'timestamp': '2025-09-30 22:16:25.054043', 'step': 3406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:25.114449', 'step': 3406, 'epoch': 1} {'type': 'loss', 'content': 0.1937410682439804, 'timestamp': '2025-09-30 22:16:25.117819', 'step': 3407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:25.174686', 'step': 3407, 'epoch': 1} {'type': 'loss', 'content': 0.22655780613422394, 'timestamp': '2025-09-30 22:16:25.198287', 'step': 3408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:25.257658', 'step': 3408, 'epoch': 1} {'type': 'loss', 'content': 0.11091888695955276, 'timestamp': '2025-09-30 22:16:25.260772', 'step': 3409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:25.316744', 'step': 3409, 'epoch': 1} {'type': 'loss', 'content': 0.1634877324104309, 'timestamp': '2025-09-30 22:16:25.319960', 'step': 3410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:25.380749', 'step': 3410, 'epoch': 1} {'type': 'loss', 'content': 0.06205414980649948, 'timestamp': '2025-09-30 22:16:25.383360', 'step': 3411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:25.439008', 'step': 3411, 'epoch': 1} {'type': 'loss', 'content': 0.2755235433578491, 'timestamp': '2025-09-30 22:16:25.445240', 'step': 3412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:25.504139', 'step': 3412, 'epoch': 1} {'type': 'loss', 'content': 0.1859551966190338, 'timestamp': '2025-09-30 22:16:25.507015', 'step': 3413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:25.563733', 'step': 3413, 'epoch': 1} {'type': 'loss', 'content': 0.13943342864513397, 'timestamp': '2025-09-30 22:16:25.573184', 'step': 3414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:25.634412', 'step': 3414, 'epoch': 1} {'type': 'loss', 'content': 0.1296602487564087, 'timestamp': '2025-09-30 22:16:25.638442', 'step': 3415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:25.697332', 'step': 3415, 'epoch': 1} {'type': 'loss', 'content': 0.18484379351139069, 'timestamp': '2025-09-30 22:16:25.704362', 'step': 3416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:25.769477', 'step': 3416, 'epoch': 1} {'type': 'loss', 'content': 0.14178678393363953, 'timestamp': '2025-09-30 22:16:25.772306', 'step': 3417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:25.836727', 'step': 3417, 'epoch': 1} {'type': 'loss', 'content': 0.1305728703737259, 'timestamp': '2025-09-30 22:16:25.840066', 'step': 3418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:25.900201', 'step': 3418, 'epoch': 1} {'type': 'loss', 'content': 0.14274118840694427, 'timestamp': '2025-09-30 22:16:25.909031', 'step': 3419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:25.976758', 'step': 3419, 'epoch': 1} {'type': 'loss', 'content': 0.09139175713062286, 'timestamp': '2025-09-30 22:16:25.993602', 'step': 3420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.056971', 'step': 3420, 'epoch': 1} {'type': 'loss', 'content': 0.2265893965959549, 'timestamp': '2025-09-30 22:16:26.063623', 'step': 3421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.125739', 'step': 3421, 'epoch': 1} {'type': 'loss', 'content': 0.1409740447998047, 'timestamp': '2025-09-30 22:16:26.128451', 'step': 3422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:26.186199', 'step': 3422, 'epoch': 1} {'type': 'loss', 'content': 0.1641870141029358, 'timestamp': '2025-09-30 22:16:26.189558', 'step': 3423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:26.256133', 'step': 3423, 'epoch': 1} {'type': 'loss', 'content': 0.099612295627594, 'timestamp': '2025-09-30 22:16:26.263005', 'step': 3424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.321695', 'step': 3424, 'epoch': 1} {'type': 'loss', 'content': 0.1215301901102066, 'timestamp': '2025-09-30 22:16:26.325443', 'step': 3425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.383221', 'step': 3425, 'epoch': 1} {'type': 'loss', 'content': 0.15842391550540924, 'timestamp': '2025-09-30 22:16:26.386785', 'step': 3426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:26.456510', 'step': 3426, 'epoch': 1} {'type': 'loss', 'content': 0.2636915147304535, 'timestamp': '2025-09-30 22:16:26.459846', 'step': 3427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:26.523079', 'step': 3427, 'epoch': 1} {'type': 'loss', 'content': 0.26717203855514526, 'timestamp': '2025-09-30 22:16:26.530303', 'step': 3428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:26.591997', 'step': 3428, 'epoch': 1} {'type': 'loss', 'content': 0.20284198224544525, 'timestamp': '2025-09-30 22:16:26.597765', 'step': 3429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:26.655132', 'step': 3429, 'epoch': 1} {'type': 'loss', 'content': 0.26385679841041565, 'timestamp': '2025-09-30 22:16:26.658485', 'step': 3430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.721348', 'step': 3430, 'epoch': 1} {'type': 'loss', 'content': 0.15691109001636505, 'timestamp': '2025-09-30 22:16:26.725179', 'step': 3431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.784698', 'step': 3431, 'epoch': 1} {'type': 'loss', 'content': 0.1781059056520462, 'timestamp': '2025-09-30 22:16:26.791069', 'step': 3432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.851191', 'step': 3432, 'epoch': 1} {'type': 'loss', 'content': 0.10001068562269211, 'timestamp': '2025-09-30 22:16:26.854224', 'step': 3433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.910515', 'step': 3433, 'epoch': 1} {'type': 'loss', 'content': 0.19036267697811127, 'timestamp': '2025-09-30 22:16:26.915442', 'step': 3434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:26.975015', 'step': 3434, 'epoch': 1} {'type': 'loss', 'content': 0.19811590015888214, 'timestamp': '2025-09-30 22:16:26.978879', 'step': 3435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:27.036265', 'step': 3435, 'epoch': 1} {'type': 'loss', 'content': 0.2860717177391052, 'timestamp': '2025-09-30 22:16:27.042884', 'step': 3436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:27.099153', 'step': 3436, 'epoch': 1} {'type': 'loss', 'content': 0.12593792378902435, 'timestamp': '2025-09-30 22:16:27.107928', 'step': 3437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:27.165032', 'step': 3437, 'epoch': 1} {'type': 'loss', 'content': 0.25512567162513733, 'timestamp': '2025-09-30 22:16:27.167776', 'step': 3438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:27.224767', 'step': 3438, 'epoch': 1} {'type': 'loss', 'content': 0.12015215307474136, 'timestamp': '2025-09-30 22:16:27.231279', 'step': 3439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:27.292231', 'step': 3439, 'epoch': 1} {'type': 'loss', 'content': 0.1814926564693451, 'timestamp': '2025-09-30 22:16:27.304073', 'step': 3440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:27.360831', 'step': 3440, 'epoch': 1} {'type': 'loss', 'content': 0.15673315525054932, 'timestamp': '2025-09-30 22:16:27.369566', 'step': 3441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:27.433069', 'step': 3441, 'epoch': 1} {'type': 'loss', 'content': 0.18829232454299927, 'timestamp': '2025-09-30 22:16:27.436919', 'step': 3442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:27.497002', 'step': 3442, 'epoch': 1} {'type': 'loss', 'content': 0.1735275834798813, 'timestamp': '2025-09-30 22:16:27.500135', 'step': 3443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:27.563864', 'step': 3443, 'epoch': 1} {'type': 'loss', 'content': 0.12732750177383423, 'timestamp': '2025-09-30 22:16:27.572231', 'step': 3444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:27.630484', 'step': 3444, 'epoch': 1} {'type': 'loss', 'content': 0.2091558277606964, 'timestamp': '2025-09-30 22:16:27.641648', 'step': 3445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:27.703909', 'step': 3445, 'epoch': 1} {'type': 'loss', 'content': 0.1688602715730667, 'timestamp': '2025-09-30 22:16:27.707387', 'step': 3446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:27.765259', 'step': 3446, 'epoch': 1} {'type': 'loss', 'content': 0.1667671501636505, 'timestamp': '2025-09-30 22:16:27.768229', 'step': 3447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:27.830449', 'step': 3447, 'epoch': 1} {'type': 'loss', 'content': 0.20513387024402618, 'timestamp': '2025-09-30 22:16:27.842406', 'step': 3448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:27.900118', 'step': 3448, 'epoch': 1} {'type': 'loss', 'content': 0.13654950261116028, 'timestamp': '2025-09-30 22:16:27.903970', 'step': 3449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:27.969940', 'step': 3449, 'epoch': 1} {'type': 'loss', 'content': 0.17235475778579712, 'timestamp': '2025-09-30 22:16:27.972911', 'step': 3450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:28.029913', 'step': 3450, 'epoch': 1} {'type': 'loss', 'content': 0.19437722861766815, 'timestamp': '2025-09-30 22:16:28.033763', 'step': 3451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:28.092520', 'step': 3451, 'epoch': 1} {'type': 'loss', 'content': 0.19765888154506683, 'timestamp': '2025-09-30 22:16:28.104744', 'step': 3452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:28.161238', 'step': 3452, 'epoch': 1} {'type': 'loss', 'content': 0.18446457386016846, 'timestamp': '2025-09-30 22:16:28.164120', 'step': 3453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:28.221063', 'step': 3453, 'epoch': 1} {'type': 'loss', 'content': 0.19258850812911987, 'timestamp': '2025-09-30 22:16:28.224980', 'step': 3454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:28.283291', 'step': 3454, 'epoch': 1} {'type': 'loss', 'content': 0.13110189139842987, 'timestamp': '2025-09-30 22:16:28.299059', 'step': 3455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:28.363327', 'step': 3455, 'epoch': 1} {'type': 'loss', 'content': 0.2045535445213318, 'timestamp': '2025-09-30 22:16:28.385531', 'step': 3456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:28.442633', 'step': 3456, 'epoch': 1} {'type': 'loss', 'content': 0.08291368186473846, 'timestamp': '2025-09-30 22:16:28.446172', 'step': 3457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:28.504156', 'step': 3457, 'epoch': 1} {'type': 'loss', 'content': 0.15062196552753448, 'timestamp': '2025-09-30 22:16:28.507570', 'step': 3458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:28.565008', 'step': 3458, 'epoch': 1} {'type': 'loss', 'content': 0.21005704998970032, 'timestamp': '2025-09-30 22:16:28.568525', 'step': 3459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:28.625917', 'step': 3459, 'epoch': 1} {'type': 'loss', 'content': 0.14414392411708832, 'timestamp': '2025-09-30 22:16:28.633036', 'step': 3460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:28.689174', 'step': 3460, 'epoch': 1} {'type': 'loss', 'content': 0.1445990651845932, 'timestamp': '2025-09-30 22:16:28.693071', 'step': 3461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:28.751225', 'step': 3461, 'epoch': 1} {'type': 'loss', 'content': 0.10238463431596756, 'timestamp': '2025-09-30 22:16:28.755113', 'step': 3462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:28.820214', 'step': 3462, 'epoch': 1} {'type': 'loss', 'content': 0.14871031045913696, 'timestamp': '2025-09-30 22:16:28.827676', 'step': 3463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:28.884982', 'step': 3463, 'epoch': 1} {'type': 'loss', 'content': 0.1504516750574112, 'timestamp': '2025-09-30 22:16:28.891920', 'step': 3464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:28.948464', 'step': 3464, 'epoch': 1} {'type': 'loss', 'content': 0.18213608860969543, 'timestamp': '2025-09-30 22:16:28.952249', 'step': 3465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:29.009569', 'step': 3465, 'epoch': 1} {'type': 'loss', 'content': 0.10584183037281036, 'timestamp': '2025-09-30 22:16:29.012638', 'step': 3466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:29.075637', 'step': 3466, 'epoch': 1} {'type': 'loss', 'content': 0.1975894719362259, 'timestamp': '2025-09-30 22:16:29.084320', 'step': 3467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:29.152286', 'step': 3467, 'epoch': 1} {'type': 'loss', 'content': 0.1639844924211502, 'timestamp': '2025-09-30 22:16:29.159725', 'step': 3468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:29.217344', 'step': 3468, 'epoch': 1} {'type': 'loss', 'content': 0.15581531822681427, 'timestamp': '2025-09-30 22:16:29.221026', 'step': 3469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:29.291474', 'step': 3469, 'epoch': 1} {'type': 'loss', 'content': 0.18678447604179382, 'timestamp': '2025-09-30 22:16:29.294972', 'step': 3470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:29.354575', 'step': 3470, 'epoch': 1} {'type': 'loss', 'content': 0.2180599719285965, 'timestamp': '2025-09-30 22:16:29.358321', 'step': 3471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:29.415746', 'step': 3471, 'epoch': 1} {'type': 'loss', 'content': 0.15332484245300293, 'timestamp': '2025-09-30 22:16:29.423369', 'step': 3472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:29.480024', 'step': 3472, 'epoch': 1} {'type': 'loss', 'content': 0.15595956146717072, 'timestamp': '2025-09-30 22:16:29.483708', 'step': 3473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:29.545969', 'step': 3473, 'epoch': 1} {'type': 'loss', 'content': 0.18534736335277557, 'timestamp': '2025-09-30 22:16:29.550426', 'step': 3474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:29.607088', 'step': 3474, 'epoch': 1} {'type': 'loss', 'content': 0.151661679148674, 'timestamp': '2025-09-30 22:16:29.609993', 'step': 3475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:29.670555', 'step': 3475, 'epoch': 1} {'type': 'loss', 'content': 0.2771167755126953, 'timestamp': '2025-09-30 22:16:29.677523', 'step': 3476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:29.734736', 'step': 3476, 'epoch': 1} {'type': 'loss', 'content': 0.14729547500610352, 'timestamp': '2025-09-30 22:16:29.744039', 'step': 3477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:29.802011', 'step': 3477, 'epoch': 1} {'type': 'loss', 'content': 0.14225026965141296, 'timestamp': '2025-09-30 22:16:29.805320', 'step': 3478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:29.862295', 'step': 3478, 'epoch': 1} {'type': 'loss', 'content': 0.1539176106452942, 'timestamp': '2025-09-30 22:16:29.865246', 'step': 3479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:29.922480', 'step': 3479, 'epoch': 1} {'type': 'loss', 'content': 0.19997116923332214, 'timestamp': '2025-09-30 22:16:29.934670', 'step': 3480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:29.996059', 'step': 3480, 'epoch': 1} {'type': 'loss', 'content': 0.15343445539474487, 'timestamp': '2025-09-30 22:16:29.998739', 'step': 3481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:30.055223', 'step': 3481, 'epoch': 1} {'type': 'loss', 'content': 0.23042811453342438, 'timestamp': '2025-09-30 22:16:30.058686', 'step': 3482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:30.129934', 'step': 3482, 'epoch': 1} {'type': 'loss', 'content': 0.2222415804862976, 'timestamp': '2025-09-30 22:16:30.135190', 'step': 3483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:30.196062', 'step': 3483, 'epoch': 1} {'type': 'loss', 'content': 0.1758386343717575, 'timestamp': '2025-09-30 22:16:30.203326', 'step': 3484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:30.262900', 'step': 3484, 'epoch': 1} {'type': 'loss', 'content': 0.14594344794750214, 'timestamp': '2025-09-30 22:16:30.266161', 'step': 3485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:30.341543', 'step': 3485, 'epoch': 1} {'type': 'loss', 'content': 0.12281842529773712, 'timestamp': '2025-09-30 22:16:30.345134', 'step': 3486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:30.403515', 'step': 3486, 'epoch': 1} {'type': 'loss', 'content': 0.18498992919921875, 'timestamp': '2025-09-30 22:16:30.423791', 'step': 3487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:30.481187', 'step': 3487, 'epoch': 1} {'type': 'loss', 'content': 0.13942337036132812, 'timestamp': '2025-09-30 22:16:30.493119', 'step': 3488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:30.552248', 'step': 3488, 'epoch': 1} {'type': 'loss', 'content': 0.20345506072044373, 'timestamp': '2025-09-30 22:16:30.555947', 'step': 3489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:30.612843', 'step': 3489, 'epoch': 1} {'type': 'loss', 'content': 0.1698305457830429, 'timestamp': '2025-09-30 22:16:30.615713', 'step': 3490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:30.677072', 'step': 3490, 'epoch': 1} {'type': 'loss', 'content': 0.22044195234775543, 'timestamp': '2025-09-30 22:16:30.686692', 'step': 3491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:30.744907', 'step': 3491, 'epoch': 1} {'type': 'loss', 'content': 0.15053515136241913, 'timestamp': '2025-09-30 22:16:30.752036', 'step': 3492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:30.808041', 'step': 3492, 'epoch': 1} {'type': 'loss', 'content': 0.07528860121965408, 'timestamp': '2025-09-30 22:16:30.826673', 'step': 3493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:30.884144', 'step': 3493, 'epoch': 1} {'type': 'loss', 'content': 0.21658514440059662, 'timestamp': '2025-09-30 22:16:30.887962', 'step': 3494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:30.950061', 'step': 3494, 'epoch': 1} {'type': 'loss', 'content': 0.13605935871601105, 'timestamp': '2025-09-30 22:16:30.952990', 'step': 3495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:31.015016', 'step': 3495, 'epoch': 1} {'type': 'loss', 'content': 0.338192343711853, 'timestamp': '2025-09-30 22:16:31.027630', 'step': 3496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:16:31.085020', 'step': 3496, 'epoch': 1} {'type': 'loss', 'content': 0.10168930888175964, 'timestamp': '2025-09-30 22:16:31.087747', 'step': 3497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:31.145186', 'step': 3497, 'epoch': 1} {'type': 'loss', 'content': 0.1924678087234497, 'timestamp': '2025-09-30 22:16:31.148573', 'step': 3498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:31.210901', 'step': 3498, 'epoch': 1} {'type': 'loss', 'content': 0.13135863840579987, 'timestamp': '2025-09-30 22:16:31.214641', 'step': 3499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:31.271937', 'step': 3499, 'epoch': 1} {'type': 'loss', 'content': 0.1317896693944931, 'timestamp': '2025-09-30 22:16:31.279261', 'step': 3500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-30 22:16:31.707635', 'step': 3500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:31.770873', 'step': 3500, 'epoch': 1} {'type': 'loss', 'content': 0.21093036234378815, 'timestamp': '2025-09-30 22:16:31.774139', 'step': 3501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:31.832120', 'step': 3501, 'epoch': 1} {'type': 'loss', 'content': 0.12137582153081894, 'timestamp': '2025-09-30 22:16:31.835565', 'step': 3502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:31.896872', 'step': 3502, 'epoch': 1} {'type': 'loss', 'content': 0.11178155988454819, 'timestamp': '2025-09-30 22:16:31.900046', 'step': 3503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:31.961982', 'step': 3503, 'epoch': 1} {'type': 'loss', 'content': 0.18503320217132568, 'timestamp': '2025-09-30 22:16:31.969409', 'step': 3504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:32.033115', 'step': 3504, 'epoch': 1} {'type': 'loss', 'content': 0.14973892271518707, 'timestamp': '2025-09-30 22:16:32.036675', 'step': 3505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:32.093988', 'step': 3505, 'epoch': 1} {'type': 'loss', 'content': 0.15021981298923492, 'timestamp': '2025-09-30 22:16:32.100538', 'step': 3506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:32.158009', 'step': 3506, 'epoch': 1} {'type': 'loss', 'content': 0.1835702657699585, 'timestamp': '2025-09-30 22:16:32.167191', 'step': 3507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:32.224524', 'step': 3507, 'epoch': 1} {'type': 'loss', 'content': 0.15121598541736603, 'timestamp': '2025-09-30 22:16:32.232415', 'step': 3508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:32.289843', 'step': 3508, 'epoch': 1} {'type': 'loss', 'content': 0.16478757560253143, 'timestamp': '2025-09-30 22:16:32.293970', 'step': 3509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:32.360592', 'step': 3509, 'epoch': 1} {'type': 'loss', 'content': 0.1953595131635666, 'timestamp': '2025-09-30 22:16:32.364261', 'step': 3510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:32.421219', 'step': 3510, 'epoch': 1} {'type': 'loss', 'content': 0.24013829231262207, 'timestamp': '2025-09-30 22:16:32.424750', 'step': 3511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:32.482152', 'step': 3511, 'epoch': 1} {'type': 'loss', 'content': 0.1554638147354126, 'timestamp': '2025-09-30 22:16:32.489686', 'step': 3512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:32.547058', 'step': 3512, 'epoch': 1} {'type': 'loss', 'content': 0.17094074189662933, 'timestamp': '2025-09-30 22:16:32.550734', 'step': 3513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:32.607910', 'step': 3513, 'epoch': 1} {'type': 'loss', 'content': 0.1739998310804367, 'timestamp': '2025-09-30 22:16:32.613431', 'step': 3514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:32.671076', 'step': 3514, 'epoch': 1} {'type': 'loss', 'content': 0.2953356206417084, 'timestamp': '2025-09-30 22:16:32.684567', 'step': 3515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:32.754499', 'step': 3515, 'epoch': 1} {'type': 'loss', 'content': 0.14874309301376343, 'timestamp': '2025-09-30 22:16:32.762125', 'step': 3516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:32.820332', 'step': 3516, 'epoch': 1} {'type': 'loss', 'content': 0.15975716710090637, 'timestamp': '2025-09-30 22:16:32.824981', 'step': 3517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:32.883485', 'step': 3517, 'epoch': 1} {'type': 'loss', 'content': 0.14418499171733856, 'timestamp': '2025-09-30 22:16:32.900753', 'step': 3518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:32.959984', 'step': 3518, 'epoch': 1} {'type': 'loss', 'content': 0.21717509627342224, 'timestamp': '2025-09-30 22:16:32.974465', 'step': 3519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:33.033326', 'step': 3519, 'epoch': 1} {'type': 'loss', 'content': 0.2045750916004181, 'timestamp': '2025-09-30 22:16:33.040479', 'step': 3520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:33.097414', 'step': 3520, 'epoch': 1} {'type': 'loss', 'content': 0.2277034968137741, 'timestamp': '2025-09-30 22:16:33.100240', 'step': 3521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:33.159061', 'step': 3521, 'epoch': 1} {'type': 'loss', 'content': 0.1320427656173706, 'timestamp': '2025-09-30 22:16:33.162844', 'step': 3522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:33.220956', 'step': 3522, 'epoch': 1} {'type': 'loss', 'content': 0.213314026594162, 'timestamp': '2025-09-30 22:16:33.224152', 'step': 3523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:33.284093', 'step': 3523, 'epoch': 1} {'type': 'loss', 'content': 0.13097701966762543, 'timestamp': '2025-09-30 22:16:33.292207', 'step': 3524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:16:33.349182', 'step': 3524, 'epoch': 1} {'type': 'loss', 'content': 0.1307051032781601, 'timestamp': '2025-09-30 22:16:33.352857', 'step': 3525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:33.411078', 'step': 3525, 'epoch': 1} {'type': 'loss', 'content': 0.1717955619096756, 'timestamp': '2025-09-30 22:16:33.415718', 'step': 3526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:33.474274', 'step': 3526, 'epoch': 1} {'type': 'loss', 'content': 0.276962012052536, 'timestamp': '2025-09-30 22:16:33.477257', 'step': 3527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:33.535247', 'step': 3527, 'epoch': 1} {'type': 'loss', 'content': 0.23813483119010925, 'timestamp': '2025-09-30 22:16:33.542073', 'step': 3528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:33.599528', 'step': 3528, 'epoch': 1} {'type': 'loss', 'content': 0.13754713535308838, 'timestamp': '2025-09-30 22:16:33.609883', 'step': 3529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:33.668142', 'step': 3529, 'epoch': 1} {'type': 'loss', 'content': 0.1974450796842575, 'timestamp': '2025-09-30 22:16:33.674144', 'step': 3530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:33.733712', 'step': 3530, 'epoch': 1} {'type': 'loss', 'content': 0.2422126680612564, 'timestamp': '2025-09-30 22:16:33.738069', 'step': 3531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:33.797053', 'step': 3531, 'epoch': 1} {'type': 'loss', 'content': 0.11978194117546082, 'timestamp': '2025-09-30 22:16:33.804432', 'step': 3532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:33.863543', 'step': 3532, 'epoch': 1} {'type': 'loss', 'content': 0.2342238575220108, 'timestamp': '2025-09-30 22:16:33.868566', 'step': 3533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:33.926594', 'step': 3533, 'epoch': 1} {'type': 'loss', 'content': 0.20238257944583893, 'timestamp': '2025-09-30 22:16:33.931549', 'step': 3534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:33.990019', 'step': 3534, 'epoch': 1} {'type': 'loss', 'content': 0.13500703871250153, 'timestamp': '2025-09-30 22:16:33.993665', 'step': 3535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:34.051720', 'step': 3535, 'epoch': 1} {'type': 'loss', 'content': 0.18699292838573456, 'timestamp': '2025-09-30 22:16:34.060380', 'step': 3536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:34.118142', 'step': 3536, 'epoch': 1} {'type': 'loss', 'content': 0.17224200069904327, 'timestamp': '2025-09-30 22:16:34.123632', 'step': 3537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:34.182032', 'step': 3537, 'epoch': 1} {'type': 'loss', 'content': 0.0781475380063057, 'timestamp': '2025-09-30 22:16:34.192103', 'step': 3538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:34.249408', 'step': 3538, 'epoch': 1} {'type': 'loss', 'content': 0.2639261484146118, 'timestamp': '2025-09-30 22:16:34.254238', 'step': 3539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:34.314271', 'step': 3539, 'epoch': 1} {'type': 'loss', 'content': 0.248098686337471, 'timestamp': '2025-09-30 22:16:34.322190', 'step': 3540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:34.380325', 'step': 3540, 'epoch': 1} {'type': 'loss', 'content': 0.26449400186538696, 'timestamp': '2025-09-30 22:16:34.394382', 'step': 3541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:34.451336', 'step': 3541, 'epoch': 1} {'type': 'loss', 'content': 0.23468650877475739, 'timestamp': '2025-09-30 22:16:34.456861', 'step': 3542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:34.522895', 'step': 3542, 'epoch': 1} {'type': 'loss', 'content': 0.1845380961894989, 'timestamp': '2025-09-30 22:16:34.526061', 'step': 3543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:34.584012', 'step': 3543, 'epoch': 1} {'type': 'loss', 'content': 0.1254214644432068, 'timestamp': '2025-09-30 22:16:34.599484', 'step': 3544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:34.667941', 'step': 3544, 'epoch': 1} {'type': 'loss', 'content': 0.1991037130355835, 'timestamp': '2025-09-30 22:16:34.671853', 'step': 3545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:34.730728', 'step': 3545, 'epoch': 1} {'type': 'loss', 'content': 0.2431989163160324, 'timestamp': '2025-09-30 22:16:34.737027', 'step': 3546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:16:34.796935', 'step': 3546, 'epoch': 1} {'type': 'loss', 'content': 0.16122542321681976, 'timestamp': '2025-09-30 22:16:34.801474', 'step': 3547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:34.859925', 'step': 3547, 'epoch': 1} {'type': 'loss', 'content': 0.09848835319280624, 'timestamp': '2025-09-30 22:16:34.879935', 'step': 3548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:34.936319', 'step': 3548, 'epoch': 1} {'type': 'loss', 'content': 0.12964192032814026, 'timestamp': '2025-09-30 22:16:34.940556', 'step': 3549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:34.998215', 'step': 3549, 'epoch': 1} {'type': 'loss', 'content': 0.21509666740894318, 'timestamp': '2025-09-30 22:16:35.001867', 'step': 3550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:35.060060', 'step': 3550, 'epoch': 1} {'type': 'loss', 'content': 0.12365000694990158, 'timestamp': '2025-09-30 22:16:35.063637', 'step': 3551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:35.120737', 'step': 3551, 'epoch': 1} {'type': 'loss', 'content': 0.12400466948747635, 'timestamp': '2025-09-30 22:16:35.128277', 'step': 3552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:35.185182', 'step': 3552, 'epoch': 1} {'type': 'loss', 'content': 0.1232297420501709, 'timestamp': '2025-09-30 22:16:35.188962', 'step': 3553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:35.246442', 'step': 3553, 'epoch': 1} {'type': 'loss', 'content': 0.10902515053749084, 'timestamp': '2025-09-30 22:16:35.251383', 'step': 3554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:35.307999', 'step': 3554, 'epoch': 1} {'type': 'loss', 'content': 0.16706505417823792, 'timestamp': '2025-09-30 22:16:35.313998', 'step': 3555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:35.381743', 'step': 3555, 'epoch': 1} {'type': 'loss', 'content': 0.18456724286079407, 'timestamp': '2025-09-30 22:16:35.388245', 'step': 3556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:35.445736', 'step': 3556, 'epoch': 1} {'type': 'loss', 'content': 0.15861424803733826, 'timestamp': '2025-09-30 22:16:35.449586', 'step': 3557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:35.507941', 'step': 3557, 'epoch': 1} {'type': 'loss', 'content': 0.16456836462020874, 'timestamp': '2025-09-30 22:16:35.515152', 'step': 3558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:35.576246', 'step': 3558, 'epoch': 1} {'type': 'loss', 'content': 0.266931414604187, 'timestamp': '2025-09-30 22:16:35.582086', 'step': 3559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:35.640108', 'step': 3559, 'epoch': 1} {'type': 'loss', 'content': 0.09117387235164642, 'timestamp': '2025-09-30 22:16:35.648349', 'step': 3560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:35.708433', 'step': 3560, 'epoch': 1} {'type': 'loss', 'content': 0.2385997474193573, 'timestamp': '2025-09-30 22:16:35.716246', 'step': 3561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:35.773632', 'step': 3561, 'epoch': 1} {'type': 'loss', 'content': 0.1876431405544281, 'timestamp': '2025-09-30 22:16:35.778115', 'step': 3562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:35.834987', 'step': 3562, 'epoch': 1} {'type': 'loss', 'content': 0.12443116307258606, 'timestamp': '2025-09-30 22:16:35.839105', 'step': 3563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:35.896250', 'step': 3563, 'epoch': 1} {'type': 'loss', 'content': 0.1861570030450821, 'timestamp': '2025-09-30 22:16:35.903114', 'step': 3564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:35.959701', 'step': 3564, 'epoch': 1} {'type': 'loss', 'content': 0.1413320153951645, 'timestamp': '2025-09-30 22:16:35.963102', 'step': 3565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:36.029030', 'step': 3565, 'epoch': 1} {'type': 'loss', 'content': 0.24090829491615295, 'timestamp': '2025-09-30 22:16:36.040051', 'step': 3566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:36.107320', 'step': 3566, 'epoch': 1} {'type': 'loss', 'content': 0.088912233710289, 'timestamp': '2025-09-30 22:16:36.113384', 'step': 3567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:36.169829', 'step': 3567, 'epoch': 1} {'type': 'loss', 'content': 0.11956548690795898, 'timestamp': '2025-09-30 22:16:36.176833', 'step': 3568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:36.244001', 'step': 3568, 'epoch': 1} {'type': 'loss', 'content': 0.2134222537279129, 'timestamp': '2025-09-30 22:16:36.248112', 'step': 3569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:36.306962', 'step': 3569, 'epoch': 1} {'type': 'loss', 'content': 0.19544179737567902, 'timestamp': '2025-09-30 22:16:36.314766', 'step': 3570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:36.372560', 'step': 3570, 'epoch': 1} {'type': 'loss', 'content': 0.22024357318878174, 'timestamp': '2025-09-30 22:16:36.375619', 'step': 3571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:36.440828', 'step': 3571, 'epoch': 1} {'type': 'loss', 'content': 0.16734333336353302, 'timestamp': '2025-09-30 22:16:36.449229', 'step': 3572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:36.507391', 'step': 3572, 'epoch': 1} {'type': 'loss', 'content': 0.14922241866588593, 'timestamp': '2025-09-30 22:16:36.511952', 'step': 3573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:36.571205', 'step': 3573, 'epoch': 1} {'type': 'loss', 'content': 0.23308812081813812, 'timestamp': '2025-09-30 22:16:36.575508', 'step': 3574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:36.632904', 'step': 3574, 'epoch': 1} {'type': 'loss', 'content': 0.17339207231998444, 'timestamp': '2025-09-30 22:16:36.646341', 'step': 3575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:36.712287', 'step': 3575, 'epoch': 1} {'type': 'loss', 'content': 0.21582293510437012, 'timestamp': '2025-09-30 22:16:36.721409', 'step': 3576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:36.788742', 'step': 3576, 'epoch': 1} {'type': 'loss', 'content': 0.23800987005233765, 'timestamp': '2025-09-30 22:16:36.791891', 'step': 3577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:36.848920', 'step': 3577, 'epoch': 1} {'type': 'loss', 'content': 0.16955362260341644, 'timestamp': '2025-09-30 22:16:36.856555', 'step': 3578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:36.913920', 'step': 3578, 'epoch': 1} {'type': 'loss', 'content': 0.0851447582244873, 'timestamp': '2025-09-30 22:16:36.917412', 'step': 3579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:36.974248', 'step': 3579, 'epoch': 1} {'type': 'loss', 'content': 0.0959586352109909, 'timestamp': '2025-09-30 22:16:36.984577', 'step': 3580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:37.043351', 'step': 3580, 'epoch': 1} {'type': 'loss', 'content': 0.11598537117242813, 'timestamp': '2025-09-30 22:16:37.045975', 'step': 3581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:37.101773', 'step': 3581, 'epoch': 1} {'type': 'loss', 'content': 0.14402525126934052, 'timestamp': '2025-09-30 22:16:37.107257', 'step': 3582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:37.165134', 'step': 3582, 'epoch': 1} {'type': 'loss', 'content': 0.18115298449993134, 'timestamp': '2025-09-30 22:16:37.168499', 'step': 3583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:37.225863', 'step': 3583, 'epoch': 1} {'type': 'loss', 'content': 0.1318608522415161, 'timestamp': '2025-09-30 22:16:37.232259', 'step': 3584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:37.287994', 'step': 3584, 'epoch': 1} {'type': 'loss', 'content': 0.21809343993663788, 'timestamp': '2025-09-30 22:16:37.290499', 'step': 3585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:37.362883', 'step': 3585, 'epoch': 1} {'type': 'loss', 'content': 0.2970544099807739, 'timestamp': '2025-09-30 22:16:37.365946', 'step': 3586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:37.422630', 'step': 3586, 'epoch': 1} {'type': 'loss', 'content': 0.14244961738586426, 'timestamp': '2025-09-30 22:16:37.425490', 'step': 3587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:37.482998', 'step': 3587, 'epoch': 1} {'type': 'loss', 'content': 0.13839444518089294, 'timestamp': '2025-09-30 22:16:37.489623', 'step': 3588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:37.546981', 'step': 3588, 'epoch': 1} {'type': 'loss', 'content': 0.1644701361656189, 'timestamp': '2025-09-30 22:16:37.551226', 'step': 3589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:37.610414', 'step': 3589, 'epoch': 1} {'type': 'loss', 'content': 0.20880331099033356, 'timestamp': '2025-09-30 22:16:37.614515', 'step': 3590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:37.671725', 'step': 3590, 'epoch': 1} {'type': 'loss', 'content': 0.11471353471279144, 'timestamp': '2025-09-30 22:16:37.675446', 'step': 3591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:37.733426', 'step': 3591, 'epoch': 1} {'type': 'loss', 'content': 0.13939431309700012, 'timestamp': '2025-09-30 22:16:37.740375', 'step': 3592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:37.801585', 'step': 3592, 'epoch': 1} {'type': 'loss', 'content': 0.16361549496650696, 'timestamp': '2025-09-30 22:16:37.808308', 'step': 3593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:37.867837', 'step': 3593, 'epoch': 1} {'type': 'loss', 'content': 0.18799081444740295, 'timestamp': '2025-09-30 22:16:37.871854', 'step': 3594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:37.928652', 'step': 3594, 'epoch': 1} {'type': 'loss', 'content': 0.16889430582523346, 'timestamp': '2025-09-30 22:16:37.932344', 'step': 3595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:37.988532', 'step': 3595, 'epoch': 1} {'type': 'loss', 'content': 0.12718644738197327, 'timestamp': '2025-09-30 22:16:37.995023', 'step': 3596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:38.051679', 'step': 3596, 'epoch': 1} {'type': 'loss', 'content': 0.21587032079696655, 'timestamp': '2025-09-30 22:16:38.059660', 'step': 3597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:38.117606', 'step': 3597, 'epoch': 1} {'type': 'loss', 'content': 0.13343411684036255, 'timestamp': '2025-09-30 22:16:38.126786', 'step': 3598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.183324', 'step': 3598, 'epoch': 1} {'type': 'loss', 'content': 0.13571952283382416, 'timestamp': '2025-09-30 22:16:38.186623', 'step': 3599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:38.243009', 'step': 3599, 'epoch': 1} {'type': 'loss', 'content': 0.22035476565361023, 'timestamp': '2025-09-30 22:16:38.249783', 'step': 3600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.305294', 'step': 3600, 'epoch': 1} {'type': 'loss', 'content': 0.1897697001695633, 'timestamp': '2025-09-30 22:16:38.308131', 'step': 3601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.364352', 'step': 3601, 'epoch': 1} {'type': 'loss', 'content': 0.1239352896809578, 'timestamp': '2025-09-30 22:16:38.367963', 'step': 3602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:38.426043', 'step': 3602, 'epoch': 1} {'type': 'loss', 'content': 0.11904103308916092, 'timestamp': '2025-09-30 22:16:38.428997', 'step': 3603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:38.485439', 'step': 3603, 'epoch': 1} {'type': 'loss', 'content': 0.1613684892654419, 'timestamp': '2025-09-30 22:16:38.491897', 'step': 3604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.557572', 'step': 3604, 'epoch': 1} {'type': 'loss', 'content': 0.10344783961772919, 'timestamp': '2025-09-30 22:16:38.561252', 'step': 3605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:38.621415', 'step': 3605, 'epoch': 1} {'type': 'loss', 'content': 0.16907532513141632, 'timestamp': '2025-09-30 22:16:38.625279', 'step': 3606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.682707', 'step': 3606, 'epoch': 1} {'type': 'loss', 'content': 0.25776976346969604, 'timestamp': '2025-09-30 22:16:38.686191', 'step': 3607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.743865', 'step': 3607, 'epoch': 1} {'type': 'loss', 'content': 0.2568068206310272, 'timestamp': '2025-09-30 22:16:38.751780', 'step': 3608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.808505', 'step': 3608, 'epoch': 1} {'type': 'loss', 'content': 0.17689236998558044, 'timestamp': '2025-09-30 22:16:38.827903', 'step': 3609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:38.884275', 'step': 3609, 'epoch': 1} {'type': 'loss', 'content': 0.140827015042305, 'timestamp': '2025-09-30 22:16:38.890933', 'step': 3610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:38.948958', 'step': 3610, 'epoch': 1} {'type': 'loss', 'content': 0.23670856654644012, 'timestamp': '2025-09-30 22:16:38.952265', 'step': 3611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:39.009717', 'step': 3611, 'epoch': 1} {'type': 'loss', 'content': 0.25139957666397095, 'timestamp': '2025-09-30 22:16:39.019498', 'step': 3612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:39.080527', 'step': 3612, 'epoch': 1} {'type': 'loss', 'content': 0.1355268806219101, 'timestamp': '2025-09-30 22:16:39.084569', 'step': 3613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:39.155458', 'step': 3613, 'epoch': 1} {'type': 'loss', 'content': 0.20797903835773468, 'timestamp': '2025-09-30 22:16:39.158788', 'step': 3614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:39.216893', 'step': 3614, 'epoch': 1} {'type': 'loss', 'content': 0.08592219650745392, 'timestamp': '2025-09-30 22:16:39.220939', 'step': 3615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:39.277899', 'step': 3615, 'epoch': 1} {'type': 'loss', 'content': 0.19510839879512787, 'timestamp': '2025-09-30 22:16:39.293939', 'step': 3616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:39.350728', 'step': 3616, 'epoch': 1} {'type': 'loss', 'content': 0.11993438750505447, 'timestamp': '2025-09-30 22:16:39.366657', 'step': 3617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:39.425229', 'step': 3617, 'epoch': 1} {'type': 'loss', 'content': 0.16996565461158752, 'timestamp': '2025-09-30 22:16:39.430179', 'step': 3618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:39.496083', 'step': 3618, 'epoch': 1} {'type': 'loss', 'content': 0.1924530565738678, 'timestamp': '2025-09-30 22:16:39.500284', 'step': 3619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:39.558204', 'step': 3619, 'epoch': 1} {'type': 'loss', 'content': 0.211986243724823, 'timestamp': '2025-09-30 22:16:39.566334', 'step': 3620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:39.623008', 'step': 3620, 'epoch': 1} {'type': 'loss', 'content': 0.1773141473531723, 'timestamp': '2025-09-30 22:16:39.626216', 'step': 3621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:39.682478', 'step': 3621, 'epoch': 1} {'type': 'loss', 'content': 0.1390259563922882, 'timestamp': '2025-09-30 22:16:39.685162', 'step': 3622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:39.746560', 'step': 3622, 'epoch': 1} {'type': 'loss', 'content': 0.13785500824451447, 'timestamp': '2025-09-30 22:16:39.763922', 'step': 3623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:39.825596', 'step': 3623, 'epoch': 1} {'type': 'loss', 'content': 0.27825093269348145, 'timestamp': '2025-09-30 22:16:39.832524', 'step': 3624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:39.889268', 'step': 3624, 'epoch': 1} {'type': 'loss', 'content': 0.19535070657730103, 'timestamp': '2025-09-30 22:16:39.892301', 'step': 3625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:39.954578', 'step': 3625, 'epoch': 1} {'type': 'loss', 'content': 0.08801843971014023, 'timestamp': '2025-09-30 22:16:39.958834', 'step': 3626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:40.019080', 'step': 3626, 'epoch': 1} {'type': 'loss', 'content': 0.12855719029903412, 'timestamp': '2025-09-30 22:16:40.022676', 'step': 3627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:40.083052', 'step': 3627, 'epoch': 1} {'type': 'loss', 'content': 0.15205664932727814, 'timestamp': '2025-09-30 22:16:40.089368', 'step': 3628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:40.149831', 'step': 3628, 'epoch': 1} {'type': 'loss', 'content': 0.24500365555286407, 'timestamp': '2025-09-30 22:16:40.152879', 'step': 3629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:40.216735', 'step': 3629, 'epoch': 1} {'type': 'loss', 'content': 0.1935494989156723, 'timestamp': '2025-09-30 22:16:40.223277', 'step': 3630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:40.281754', 'step': 3630, 'epoch': 1} {'type': 'loss', 'content': 0.18727989494800568, 'timestamp': '2025-09-30 22:16:40.289519', 'step': 3631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:40.347090', 'step': 3631, 'epoch': 1} {'type': 'loss', 'content': 0.20042912662029266, 'timestamp': '2025-09-30 22:16:40.360979', 'step': 3632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:40.419368', 'step': 3632, 'epoch': 1} {'type': 'loss', 'content': 0.2661444842815399, 'timestamp': '2025-09-30 22:16:40.427910', 'step': 3633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:40.488175', 'step': 3633, 'epoch': 1} {'type': 'loss', 'content': 0.17023587226867676, 'timestamp': '2025-09-30 22:16:40.491587', 'step': 3634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:40.558753', 'step': 3634, 'epoch': 1} {'type': 'loss', 'content': 0.10580883920192719, 'timestamp': '2025-09-30 22:16:40.563240', 'step': 3635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:40.622185', 'step': 3635, 'epoch': 1} {'type': 'loss', 'content': 0.23304228484630585, 'timestamp': '2025-09-30 22:16:40.628598', 'step': 3636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:40.691350', 'step': 3636, 'epoch': 1} {'type': 'loss', 'content': 0.17374078929424286, 'timestamp': '2025-09-30 22:16:40.695223', 'step': 3637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:40.761184', 'step': 3637, 'epoch': 1} {'type': 'loss', 'content': 0.1520528346300125, 'timestamp': '2025-09-30 22:16:40.763773', 'step': 3638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:40.826925', 'step': 3638, 'epoch': 1} {'type': 'loss', 'content': 0.18812915682792664, 'timestamp': '2025-09-30 22:16:40.832632', 'step': 3639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:40.889637', 'step': 3639, 'epoch': 1} {'type': 'loss', 'content': 0.19563128054141998, 'timestamp': '2025-09-30 22:16:40.897069', 'step': 3640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:40.958048', 'step': 3640, 'epoch': 1} {'type': 'loss', 'content': 0.2459261119365692, 'timestamp': '2025-09-30 22:16:40.960948', 'step': 3641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:41.025740', 'step': 3641, 'epoch': 1} {'type': 'loss', 'content': 0.18868376314640045, 'timestamp': '2025-09-30 22:16:41.029111', 'step': 3642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:41.086242', 'step': 3642, 'epoch': 1} {'type': 'loss', 'content': 0.160834401845932, 'timestamp': '2025-09-30 22:16:41.089568', 'step': 3643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:41.147677', 'step': 3643, 'epoch': 1} {'type': 'loss', 'content': 0.12157515436410904, 'timestamp': '2025-09-30 22:16:41.159562', 'step': 3644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:41.219054', 'step': 3644, 'epoch': 1} {'type': 'loss', 'content': 0.12346713244915009, 'timestamp': '2025-09-30 22:16:41.222436', 'step': 3645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:41.287267', 'step': 3645, 'epoch': 1} {'type': 'loss', 'content': 0.1385822594165802, 'timestamp': '2025-09-30 22:16:41.290875', 'step': 3646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:41.347094', 'step': 3646, 'epoch': 1} {'type': 'loss', 'content': 0.15928484499454498, 'timestamp': '2025-09-30 22:16:41.351184', 'step': 3647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:41.408755', 'step': 3647, 'epoch': 1} {'type': 'loss', 'content': 0.2594430446624756, 'timestamp': '2025-09-30 22:16:41.418865', 'step': 3648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:41.478707', 'step': 3648, 'epoch': 1} {'type': 'loss', 'content': 0.1930524855852127, 'timestamp': '2025-09-30 22:16:41.482971', 'step': 3649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:41.553404', 'step': 3649, 'epoch': 1} {'type': 'loss', 'content': 0.282601535320282, 'timestamp': '2025-09-30 22:16:41.557143', 'step': 3650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:41.613786', 'step': 3650, 'epoch': 1} {'type': 'loss', 'content': 0.14999480545520782, 'timestamp': '2025-09-30 22:16:41.621025', 'step': 3651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-30 22:16:41.697044', 'step': 3651, 'epoch': 1} {'type': 'loss', 'content': 0.12066634744405746, 'timestamp': '2025-09-30 22:16:41.703761', 'step': 3652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:41.760765', 'step': 3652, 'epoch': 1} {'type': 'loss', 'content': 0.13677175343036652, 'timestamp': '2025-09-30 22:16:41.763472', 'step': 3653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:41.821592', 'step': 3653, 'epoch': 1} {'type': 'loss', 'content': 0.13798688352108002, 'timestamp': '2025-09-30 22:16:41.826145', 'step': 3654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:41.885182', 'step': 3654, 'epoch': 1} {'type': 'loss', 'content': 0.1325707733631134, 'timestamp': '2025-09-30 22:16:41.888883', 'step': 3655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:41.947589', 'step': 3655, 'epoch': 1} {'type': 'loss', 'content': 0.1549612581729889, 'timestamp': '2025-09-30 22:16:41.954712', 'step': 3656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:42.012653', 'step': 3656, 'epoch': 1} {'type': 'loss', 'content': 0.19551925361156464, 'timestamp': '2025-09-30 22:16:42.015940', 'step': 3657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:42.079799', 'step': 3657, 'epoch': 1} {'type': 'loss', 'content': 0.16522696614265442, 'timestamp': '2025-09-30 22:16:42.082571', 'step': 3658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:42.140829', 'step': 3658, 'epoch': 1} {'type': 'loss', 'content': 0.15132540464401245, 'timestamp': '2025-09-30 22:16:42.144932', 'step': 3659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:42.202215', 'step': 3659, 'epoch': 1} {'type': 'loss', 'content': 0.3204151391983032, 'timestamp': '2025-09-30 22:16:42.216211', 'step': 3660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:42.274387', 'step': 3660, 'epoch': 1} {'type': 'loss', 'content': 0.1756543517112732, 'timestamp': '2025-09-30 22:16:42.281432', 'step': 3661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:42.340249', 'step': 3661, 'epoch': 1} {'type': 'loss', 'content': 0.21022219955921173, 'timestamp': '2025-09-30 22:16:42.348533', 'step': 3662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:42.413466', 'step': 3662, 'epoch': 1} {'type': 'loss', 'content': 0.27981144189834595, 'timestamp': '2025-09-30 22:16:42.420363', 'step': 3663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:42.482470', 'step': 3663, 'epoch': 1} {'type': 'loss', 'content': 0.16064976155757904, 'timestamp': '2025-09-30 22:16:42.488966', 'step': 3664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:16:42.545511', 'step': 3664, 'epoch': 1} {'type': 'loss', 'content': 0.15544156730175018, 'timestamp': '2025-09-30 22:16:42.552850', 'step': 3665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:42.614546', 'step': 3665, 'epoch': 1} {'type': 'loss', 'content': 0.2058178335428238, 'timestamp': '2025-09-30 22:16:42.617492', 'step': 3666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:42.675880', 'step': 3666, 'epoch': 1} {'type': 'loss', 'content': 0.201987162232399, 'timestamp': '2025-09-30 22:16:42.679998', 'step': 3667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:42.741800', 'step': 3667, 'epoch': 1} {'type': 'loss', 'content': 0.16088947653770447, 'timestamp': '2025-09-30 22:16:42.748800', 'step': 3668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:42.809254', 'step': 3668, 'epoch': 1} {'type': 'loss', 'content': 0.13965332508087158, 'timestamp': '2025-09-30 22:16:42.813639', 'step': 3669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:42.878950', 'step': 3669, 'epoch': 1} {'type': 'loss', 'content': 0.16058462858200073, 'timestamp': '2025-09-30 22:16:42.883726', 'step': 3670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:42.941618', 'step': 3670, 'epoch': 1} {'type': 'loss', 'content': 0.1725642830133438, 'timestamp': '2025-09-30 22:16:42.945492', 'step': 3671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:43.004052', 'step': 3671, 'epoch': 1} {'type': 'loss', 'content': 0.2625470757484436, 'timestamp': '2025-09-30 22:16:43.011251', 'step': 3672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.073577', 'step': 3672, 'epoch': 1} {'type': 'loss', 'content': 0.11729437857866287, 'timestamp': '2025-09-30 22:16:43.076406', 'step': 3673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:43.134367', 'step': 3673, 'epoch': 1} {'type': 'loss', 'content': 0.1660538911819458, 'timestamp': '2025-09-30 22:16:43.142338', 'step': 3674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.204956', 'step': 3674, 'epoch': 1} {'type': 'loss', 'content': 0.28355076909065247, 'timestamp': '2025-09-30 22:16:43.217738', 'step': 3675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:43.275825', 'step': 3675, 'epoch': 1} {'type': 'loss', 'content': 0.08559907227754593, 'timestamp': '2025-09-30 22:16:43.289294', 'step': 3676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:43.360874', 'step': 3676, 'epoch': 1} {'type': 'loss', 'content': 0.18722254037857056, 'timestamp': '2025-09-30 22:16:43.364496', 'step': 3677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:43.422265', 'step': 3677, 'epoch': 1} {'type': 'loss', 'content': 0.13897843658924103, 'timestamp': '2025-09-30 22:16:43.425040', 'step': 3678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.489639', 'step': 3678, 'epoch': 1} {'type': 'loss', 'content': 0.142809197306633, 'timestamp': '2025-09-30 22:16:43.498750', 'step': 3679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.561204', 'step': 3679, 'epoch': 1} {'type': 'loss', 'content': 0.21265725791454315, 'timestamp': '2025-09-30 22:16:43.567250', 'step': 3680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:16:43.624510', 'step': 3680, 'epoch': 1} {'type': 'loss', 'content': 0.17166152596473694, 'timestamp': '2025-09-30 22:16:43.634267', 'step': 3681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.699072', 'step': 3681, 'epoch': 1} {'type': 'loss', 'content': 0.13071051239967346, 'timestamp': '2025-09-30 22:16:43.709541', 'step': 3682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.773627', 'step': 3682, 'epoch': 1} {'type': 'loss', 'content': 0.17236784100532532, 'timestamp': '2025-09-30 22:16:43.776159', 'step': 3683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.840176', 'step': 3683, 'epoch': 1} {'type': 'loss', 'content': 0.16231173276901245, 'timestamp': '2025-09-30 22:16:43.851028', 'step': 3684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:43.910218', 'step': 3684, 'epoch': 1} {'type': 'loss', 'content': 0.1326330006122589, 'timestamp': '2025-09-30 22:16:43.913281', 'step': 3685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:43.978659', 'step': 3685, 'epoch': 1} {'type': 'loss', 'content': 0.2946122884750366, 'timestamp': '2025-09-30 22:16:43.990772', 'step': 3686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:44.049368', 'step': 3686, 'epoch': 1} {'type': 'loss', 'content': 0.20120568573474884, 'timestamp': '2025-09-30 22:16:44.058839', 'step': 3687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:44.128195', 'step': 3687, 'epoch': 1} {'type': 'loss', 'content': 0.23467156291007996, 'timestamp': '2025-09-30 22:16:44.138912', 'step': 3688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:44.198267', 'step': 3688, 'epoch': 1} {'type': 'loss', 'content': 0.18678690493106842, 'timestamp': '2025-09-30 22:16:44.207217', 'step': 3689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:44.267321', 'step': 3689, 'epoch': 1} {'type': 'loss', 'content': 0.23834949731826782, 'timestamp': '2025-09-30 22:16:44.271234', 'step': 3690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:44.329337', 'step': 3690, 'epoch': 1} {'type': 'loss', 'content': 0.17128048837184906, 'timestamp': '2025-09-30 22:16:44.332910', 'step': 3691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:44.398044', 'step': 3691, 'epoch': 1} {'type': 'loss', 'content': 0.13773736357688904, 'timestamp': '2025-09-30 22:16:44.407150', 'step': 3692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:44.469706', 'step': 3692, 'epoch': 1} {'type': 'loss', 'content': 0.16342739760875702, 'timestamp': '2025-09-30 22:16:44.476926', 'step': 3693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:44.540668', 'step': 3693, 'epoch': 1} {'type': 'loss', 'content': 0.25735023617744446, 'timestamp': '2025-09-30 22:16:44.543860', 'step': 3694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:44.607546', 'step': 3694, 'epoch': 1} {'type': 'loss', 'content': 0.2558828294277191, 'timestamp': '2025-09-30 22:16:44.613639', 'step': 3695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:44.676377', 'step': 3695, 'epoch': 1} {'type': 'loss', 'content': 0.12365971505641937, 'timestamp': '2025-09-30 22:16:44.682498', 'step': 3696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:16:44.739343', 'step': 3696, 'epoch': 1} {'type': 'loss', 'content': 0.08061418682336807, 'timestamp': '2025-09-30 22:16:44.746811', 'step': 3697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:44.805292', 'step': 3697, 'epoch': 1} {'type': 'loss', 'content': 0.20310135185718536, 'timestamp': '2025-09-30 22:16:44.822948', 'step': 3698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:44.880245', 'step': 3698, 'epoch': 1} {'type': 'loss', 'content': 0.1270352154970169, 'timestamp': '2025-09-30 22:16:44.883143', 'step': 3699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:44.951864', 'step': 3699, 'epoch': 1} {'type': 'loss', 'content': 0.08655757457017899, 'timestamp': '2025-09-30 22:16:44.957944', 'step': 3700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:45.014485', 'step': 3700, 'epoch': 1} {'type': 'loss', 'content': 0.206087127327919, 'timestamp': '2025-09-30 22:16:45.020513', 'step': 3701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:45.077234', 'step': 3701, 'epoch': 1} {'type': 'loss', 'content': 0.15762989223003387, 'timestamp': '2025-09-30 22:16:45.079898', 'step': 3702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:45.141550', 'step': 3702, 'epoch': 1} {'type': 'loss', 'content': 0.20130445063114166, 'timestamp': '2025-09-30 22:16:45.145546', 'step': 3703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:45.206142', 'step': 3703, 'epoch': 1} {'type': 'loss', 'content': 0.24934297800064087, 'timestamp': '2025-09-30 22:16:45.216058', 'step': 3704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:45.275802', 'step': 3704, 'epoch': 1} {'type': 'loss', 'content': 0.1472817212343216, 'timestamp': '2025-09-30 22:16:45.278744', 'step': 3705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:45.339288', 'step': 3705, 'epoch': 1} {'type': 'loss', 'content': 0.13216181099414825, 'timestamp': '2025-09-30 22:16:45.351557', 'step': 3706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:45.408087', 'step': 3706, 'epoch': 1} {'type': 'loss', 'content': 0.21454650163650513, 'timestamp': '2025-09-30 22:16:45.410758', 'step': 3707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:45.467585', 'step': 3707, 'epoch': 1} {'type': 'loss', 'content': 0.2040550708770752, 'timestamp': '2025-09-30 22:16:45.477061', 'step': 3708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:45.534732', 'step': 3708, 'epoch': 1} {'type': 'loss', 'content': 0.15828529000282288, 'timestamp': '2025-09-30 22:16:45.547744', 'step': 3709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:16:45.612040', 'step': 3709, 'epoch': 1} {'type': 'loss', 'content': 0.13642211258411407, 'timestamp': '2025-09-30 22:16:45.617220', 'step': 3710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:45.676373', 'step': 3710, 'epoch': 1} {'type': 'loss', 'content': 0.23789171874523163, 'timestamp': '2025-09-30 22:16:45.686476', 'step': 3711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:45.744573', 'step': 3711, 'epoch': 1} {'type': 'loss', 'content': 0.1402605175971985, 'timestamp': '2025-09-30 22:16:45.756861', 'step': 3712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:45.813389', 'step': 3712, 'epoch': 1} {'type': 'loss', 'content': 0.24636274576187134, 'timestamp': '2025-09-30 22:16:45.825237', 'step': 3713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:45.888240', 'step': 3713, 'epoch': 1} {'type': 'loss', 'content': 0.15962211787700653, 'timestamp': '2025-09-30 22:16:45.891186', 'step': 3714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:16:45.950000', 'step': 3714, 'epoch': 1} {'type': 'loss', 'content': 0.23459136486053467, 'timestamp': '2025-09-30 22:16:45.955178', 'step': 3715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:46.017714', 'step': 3715, 'epoch': 1} {'type': 'loss', 'content': 0.28220728039741516, 'timestamp': '2025-09-30 22:16:46.023385', 'step': 3716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:46.082203', 'step': 3716, 'epoch': 1} {'type': 'loss', 'content': 0.13629038631916046, 'timestamp': '2025-09-30 22:16:46.084909', 'step': 3717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:46.145139', 'step': 3717, 'epoch': 1} {'type': 'loss', 'content': 0.19386127591133118, 'timestamp': '2025-09-30 22:16:46.155448', 'step': 3718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:46.224391', 'step': 3718, 'epoch': 1} {'type': 'loss', 'content': 0.16836154460906982, 'timestamp': '2025-09-30 22:16:46.230982', 'step': 3719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:46.308966', 'step': 3719, 'epoch': 1} {'type': 'loss', 'content': 0.1386217176914215, 'timestamp': '2025-09-30 22:16:46.321719', 'step': 3720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:16:46.403192', 'step': 3720, 'epoch': 1} {'type': 'loss', 'content': 0.17517541348934174, 'timestamp': '2025-09-30 22:16:46.408027', 'step': 3721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:46.486165', 'step': 3721, 'epoch': 1} {'type': 'loss', 'content': 0.11711294203996658, 'timestamp': '2025-09-30 22:16:46.491942', 'step': 3722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:46.569110', 'step': 3722, 'epoch': 1} {'type': 'loss', 'content': 0.190414160490036, 'timestamp': '2025-09-30 22:16:46.572448', 'step': 3723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:46.645454', 'step': 3723, 'epoch': 1} {'type': 'loss', 'content': 0.20834550261497498, 'timestamp': '2025-09-30 22:16:46.652803', 'step': 3724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:46.709616', 'step': 3724, 'epoch': 1} {'type': 'loss', 'content': 0.141411691904068, 'timestamp': '2025-09-30 22:16:46.713023', 'step': 3725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:16:46.777677', 'step': 3725, 'epoch': 1} {'type': 'loss', 'content': 0.17504063248634338, 'timestamp': '2025-09-30 22:16:46.781008', 'step': 3726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:16:46.845201', 'step': 3726, 'epoch': 1} {'type': 'loss', 'content': 0.16421625018119812, 'timestamp': '2025-09-30 22:16:46.852735', 'step': 3727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:16:46.915134', 'step': 3727, 'epoch': 1} {'type': 'loss', 'content': 0.15103071928024292, 'timestamp': '2025-09-30 22:16:46.922598', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:17:00.408834', 'step': 3728, 'epoch': 1} {'type': 'pplx', 'content': 11083.691784357246, 'timestamp': '2025-09-30 22:17:00.429521', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:00.511842', 'step': 3728, 'epoch': 1} {'type': 'loss', 'content': 0.12452376633882523, 'timestamp': '2025-09-30 22:17:00.540635', 'step': 3729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:00.634781', 'step': 3729, 'epoch': 1} {'type': 'loss', 'content': 0.19448968768119812, 'timestamp': '2025-09-30 22:17:00.661253', 'step': 3730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:00.743845', 'step': 3730, 'epoch': 1} {'type': 'loss', 'content': 0.17618700861930847, 'timestamp': '2025-09-30 22:17:00.763721', 'step': 3731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:00.837079', 'step': 3731, 'epoch': 1} {'type': 'loss', 'content': 0.23445676267147064, 'timestamp': '2025-09-30 22:17:00.869506', 'step': 3732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:00.937350', 'step': 3732, 'epoch': 1} {'type': 'loss', 'content': 0.1819206327199936, 'timestamp': '2025-09-30 22:17:00.950077', 'step': 3733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:01.013433', 'step': 3733, 'epoch': 1} {'type': 'loss', 'content': 0.21102827787399292, 'timestamp': '2025-09-30 22:17:01.026443', 'step': 3734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:01.100646', 'step': 3734, 'epoch': 1} {'type': 'loss', 'content': 0.19105854630470276, 'timestamp': '2025-09-30 22:17:01.117578', 'step': 3735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:01.188077', 'step': 3735, 'epoch': 1} {'type': 'loss', 'content': 0.22904974222183228, 'timestamp': '2025-09-30 22:17:01.207424', 'step': 3736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:01.270893', 'step': 3736, 'epoch': 1} {'type': 'loss', 'content': 0.15995904803276062, 'timestamp': '2025-09-30 22:17:01.279906', 'step': 3737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:01.352635', 'step': 3737, 'epoch': 1} {'type': 'loss', 'content': 0.0911761075258255, 'timestamp': '2025-09-30 22:17:01.372021', 'step': 3738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:01.442092', 'step': 3738, 'epoch': 1} {'type': 'loss', 'content': 0.17109574377536774, 'timestamp': '2025-09-30 22:17:01.452333', 'step': 3739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:01.517239', 'step': 3739, 'epoch': 1} {'type': 'loss', 'content': 0.19482329487800598, 'timestamp': '2025-09-30 22:17:01.530820', 'step': 3740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:01.592560', 'step': 3740, 'epoch': 1} {'type': 'loss', 'content': 0.20962458848953247, 'timestamp': '2025-09-30 22:17:01.602686', 'step': 3741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:01.665641', 'step': 3741, 'epoch': 1} {'type': 'loss', 'content': 0.10853271931409836, 'timestamp': '2025-09-30 22:17:01.672193', 'step': 3742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:01.735928', 'step': 3742, 'epoch': 1} {'type': 'loss', 'content': 0.1632194221019745, 'timestamp': '2025-09-30 22:17:01.747897', 'step': 3743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:01.807037', 'step': 3743, 'epoch': 1} {'type': 'loss', 'content': 0.20985905826091766, 'timestamp': '2025-09-30 22:17:01.835458', 'step': 3744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:01.907972', 'step': 3744, 'epoch': 1} {'type': 'loss', 'content': 0.1623995155096054, 'timestamp': '2025-09-30 22:17:01.933630', 'step': 3745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:02.001327', 'step': 3745, 'epoch': 1} {'type': 'loss', 'content': 0.12981976568698883, 'timestamp': '2025-09-30 22:17:02.010569', 'step': 3746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:02.068064', 'step': 3746, 'epoch': 1} {'type': 'loss', 'content': 0.13882917165756226, 'timestamp': '2025-09-30 22:17:02.071030', 'step': 3747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:02.129435', 'step': 3747, 'epoch': 1} {'type': 'loss', 'content': 0.15933330357074738, 'timestamp': '2025-09-30 22:17:02.135535', 'step': 3748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:02.196367', 'step': 3748, 'epoch': 1} {'type': 'loss', 'content': 0.23553240299224854, 'timestamp': '2025-09-30 22:17:02.199864', 'step': 3749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:02.256340', 'step': 3749, 'epoch': 1} {'type': 'loss', 'content': 0.185183584690094, 'timestamp': '2025-09-30 22:17:02.259452', 'step': 3750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:02.316042', 'step': 3750, 'epoch': 1} {'type': 'loss', 'content': 0.1829962134361267, 'timestamp': '2025-09-30 22:17:02.320478', 'step': 3751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:02.379393', 'step': 3751, 'epoch': 1} {'type': 'loss', 'content': 0.10062170028686523, 'timestamp': '2025-09-30 22:17:02.386942', 'step': 3752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:02.442739', 'step': 3752, 'epoch': 1} {'type': 'loss', 'content': 0.19813378155231476, 'timestamp': '2025-09-30 22:17:02.452759', 'step': 3753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:02.509388', 'step': 3753, 'epoch': 1} {'type': 'loss', 'content': 0.13482137024402618, 'timestamp': '2025-09-30 22:17:02.512421', 'step': 3754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:02.569429', 'step': 3754, 'epoch': 1} {'type': 'loss', 'content': 0.13819460570812225, 'timestamp': '2025-09-30 22:17:02.572508', 'step': 3755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:02.634215', 'step': 3755, 'epoch': 1} {'type': 'loss', 'content': 0.20518936216831207, 'timestamp': '2025-09-30 22:17:02.645920', 'step': 3756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:02.702230', 'step': 3756, 'epoch': 1} {'type': 'loss', 'content': 0.1574043333530426, 'timestamp': '2025-09-30 22:17:02.708228', 'step': 3757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:17:02.767250', 'step': 3757, 'epoch': 1} {'type': 'loss', 'content': 0.2563977837562561, 'timestamp': '2025-09-30 22:17:02.778735', 'step': 3758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:02.835552', 'step': 3758, 'epoch': 1} {'type': 'loss', 'content': 0.11401405185461044, 'timestamp': '2025-09-30 22:17:02.839706', 'step': 3759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:02.897203', 'step': 3759, 'epoch': 1} {'type': 'loss', 'content': 0.0906379371881485, 'timestamp': '2025-09-30 22:17:02.904849', 'step': 3760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:02.960995', 'step': 3760, 'epoch': 1} {'type': 'loss', 'content': 0.13812072575092316, 'timestamp': '2025-09-30 22:17:02.971918', 'step': 3761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:03.032595', 'step': 3761, 'epoch': 1} {'type': 'loss', 'content': 0.12236153334379196, 'timestamp': '2025-09-30 22:17:03.035499', 'step': 3762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:03.092282', 'step': 3762, 'epoch': 1} {'type': 'loss', 'content': 0.13460110127925873, 'timestamp': '2025-09-30 22:17:03.102546', 'step': 3763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:03.159278', 'step': 3763, 'epoch': 1} {'type': 'loss', 'content': 0.14814668893814087, 'timestamp': '2025-09-30 22:17:03.165806', 'step': 3764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:03.224130', 'step': 3764, 'epoch': 1} {'type': 'loss', 'content': 0.10697072744369507, 'timestamp': '2025-09-30 22:17:03.228453', 'step': 3765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:03.287173', 'step': 3765, 'epoch': 1} {'type': 'loss', 'content': 0.08109352737665176, 'timestamp': '2025-09-30 22:17:03.290130', 'step': 3766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:03.348290', 'step': 3766, 'epoch': 1} {'type': 'loss', 'content': 0.12931609153747559, 'timestamp': '2025-09-30 22:17:03.351360', 'step': 3767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:03.408684', 'step': 3767, 'epoch': 1} {'type': 'loss', 'content': 0.1887311041355133, 'timestamp': '2025-09-30 22:17:03.415410', 'step': 3768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:03.478912', 'step': 3768, 'epoch': 1} {'type': 'loss', 'content': 0.19400933384895325, 'timestamp': '2025-09-30 22:17:03.481902', 'step': 3769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:03.541867', 'step': 3769, 'epoch': 1} {'type': 'loss', 'content': 0.12717466056346893, 'timestamp': '2025-09-30 22:17:03.546006', 'step': 3770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:03.603289', 'step': 3770, 'epoch': 1} {'type': 'loss', 'content': 0.14211992919445038, 'timestamp': '2025-09-30 22:17:03.607037', 'step': 3771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:03.664825', 'step': 3771, 'epoch': 1} {'type': 'loss', 'content': 0.12660308182239532, 'timestamp': '2025-09-30 22:17:03.671345', 'step': 3772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:03.728399', 'step': 3772, 'epoch': 1} {'type': 'loss', 'content': 0.175893172621727, 'timestamp': '2025-09-30 22:17:03.732396', 'step': 3773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:03.790557', 'step': 3773, 'epoch': 1} {'type': 'loss', 'content': 0.22319354116916656, 'timestamp': '2025-09-30 22:17:03.795136', 'step': 3774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:03.854053', 'step': 3774, 'epoch': 1} {'type': 'loss', 'content': 0.06552185118198395, 'timestamp': '2025-09-30 22:17:03.857873', 'step': 3775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:03.914862', 'step': 3775, 'epoch': 1} {'type': 'loss', 'content': 0.1639474332332611, 'timestamp': '2025-09-30 22:17:03.922665', 'step': 3776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:03.980579', 'step': 3776, 'epoch': 1} {'type': 'loss', 'content': 0.20519867539405823, 'timestamp': '2025-09-30 22:17:03.984149', 'step': 3777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:04.055217', 'step': 3777, 'epoch': 1} {'type': 'loss', 'content': 0.20478422939777374, 'timestamp': '2025-09-30 22:17:04.059999', 'step': 3778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:04.116860', 'step': 3778, 'epoch': 1} {'type': 'loss', 'content': 0.14641255140304565, 'timestamp': '2025-09-30 22:17:04.124778', 'step': 3779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:04.183224', 'step': 3779, 'epoch': 1} {'type': 'loss', 'content': 0.13938158750534058, 'timestamp': '2025-09-30 22:17:04.190014', 'step': 3780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:04.246443', 'step': 3780, 'epoch': 1} {'type': 'loss', 'content': 0.14246487617492676, 'timestamp': '2025-09-30 22:17:04.249894', 'step': 3781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:04.306438', 'step': 3781, 'epoch': 1} {'type': 'loss', 'content': 0.10106074064970016, 'timestamp': '2025-09-30 22:17:04.309906', 'step': 3782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:04.372513', 'step': 3782, 'epoch': 1} {'type': 'loss', 'content': 0.2344525009393692, 'timestamp': '2025-09-30 22:17:04.377140', 'step': 3783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:04.444604', 'step': 3783, 'epoch': 1} {'type': 'loss', 'content': 0.13124026358127594, 'timestamp': '2025-09-30 22:17:04.451856', 'step': 3784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:04.508802', 'step': 3784, 'epoch': 1} {'type': 'loss', 'content': 0.2072746902704239, 'timestamp': '2025-09-30 22:17:04.516234', 'step': 3785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:04.573675', 'step': 3785, 'epoch': 1} {'type': 'loss', 'content': 0.10019873827695847, 'timestamp': '2025-09-30 22:17:04.577043', 'step': 3786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:04.640649', 'step': 3786, 'epoch': 1} {'type': 'loss', 'content': 0.1449543833732605, 'timestamp': '2025-09-30 22:17:04.645339', 'step': 3787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:17:04.704506', 'step': 3787, 'epoch': 1} {'type': 'loss', 'content': 0.1658736914396286, 'timestamp': '2025-09-30 22:17:04.712705', 'step': 3788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:04.769740', 'step': 3788, 'epoch': 1} {'type': 'loss', 'content': 0.20781390368938446, 'timestamp': '2025-09-30 22:17:04.772644', 'step': 3789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:04.830231', 'step': 3789, 'epoch': 1} {'type': 'loss', 'content': 0.1711764633655548, 'timestamp': '2025-09-30 22:17:04.833281', 'step': 3790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:04.896141', 'step': 3790, 'epoch': 1} {'type': 'loss', 'content': 0.15919695794582367, 'timestamp': '2025-09-30 22:17:04.899226', 'step': 3791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:04.957615', 'step': 3791, 'epoch': 1} {'type': 'loss', 'content': 0.16833113133907318, 'timestamp': '2025-09-30 22:17:04.964311', 'step': 3792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:05.020167', 'step': 3792, 'epoch': 1} {'type': 'loss', 'content': 0.25579944252967834, 'timestamp': '2025-09-30 22:17:05.022922', 'step': 3793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:05.080149', 'step': 3793, 'epoch': 1} {'type': 'loss', 'content': 0.19783636927604675, 'timestamp': '2025-09-30 22:17:05.083288', 'step': 3794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:05.140628', 'step': 3794, 'epoch': 1} {'type': 'loss', 'content': 0.2154368758201599, 'timestamp': '2025-09-30 22:17:05.143264', 'step': 3795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:05.200727', 'step': 3795, 'epoch': 1} {'type': 'loss', 'content': 0.17901702225208282, 'timestamp': '2025-09-30 22:17:05.207834', 'step': 3796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:05.264571', 'step': 3796, 'epoch': 1} {'type': 'loss', 'content': 0.07294387370347977, 'timestamp': '2025-09-30 22:17:05.268026', 'step': 3797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:05.327188', 'step': 3797, 'epoch': 1} {'type': 'loss', 'content': 0.16032683849334717, 'timestamp': '2025-09-30 22:17:05.330476', 'step': 3798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:05.387209', 'step': 3798, 'epoch': 1} {'type': 'loss', 'content': 0.28830817341804504, 'timestamp': '2025-09-30 22:17:05.390765', 'step': 3799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:05.448519', 'step': 3799, 'epoch': 1} {'type': 'loss', 'content': 0.05034439265727997, 'timestamp': '2025-09-30 22:17:05.456105', 'step': 3800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:05.513836', 'step': 3800, 'epoch': 1} {'type': 'loss', 'content': 0.11542419344186783, 'timestamp': '2025-09-30 22:17:05.517101', 'step': 3801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:05.574196', 'step': 3801, 'epoch': 1} {'type': 'loss', 'content': 0.16118566691875458, 'timestamp': '2025-09-30 22:17:05.578612', 'step': 3802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:05.638069', 'step': 3802, 'epoch': 1} {'type': 'loss', 'content': 0.11927835643291473, 'timestamp': '2025-09-30 22:17:05.641377', 'step': 3803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:05.701555', 'step': 3803, 'epoch': 1} {'type': 'loss', 'content': 0.18850992619991302, 'timestamp': '2025-09-30 22:17:05.708394', 'step': 3804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:05.769220', 'step': 3804, 'epoch': 1} {'type': 'loss', 'content': 0.15302294492721558, 'timestamp': '2025-09-30 22:17:05.772473', 'step': 3805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:05.830932', 'step': 3805, 'epoch': 1} {'type': 'loss', 'content': 0.11856476217508316, 'timestamp': '2025-09-30 22:17:05.834045', 'step': 3806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:05.890393', 'step': 3806, 'epoch': 1} {'type': 'loss', 'content': 0.24402114748954773, 'timestamp': '2025-09-30 22:17:05.893842', 'step': 3807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:05.950653', 'step': 3807, 'epoch': 1} {'type': 'loss', 'content': 0.12695546448230743, 'timestamp': '2025-09-30 22:17:05.957286', 'step': 3808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.020212', 'step': 3808, 'epoch': 1} {'type': 'loss', 'content': 0.15219227969646454, 'timestamp': '2025-09-30 22:17:06.023917', 'step': 3809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:06.080585', 'step': 3809, 'epoch': 1} {'type': 'loss', 'content': 0.19656644761562347, 'timestamp': '2025-09-30 22:17:06.083758', 'step': 3810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:06.139941', 'step': 3810, 'epoch': 1} {'type': 'loss', 'content': 0.16209745407104492, 'timestamp': '2025-09-30 22:17:06.142520', 'step': 3811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.200167', 'step': 3811, 'epoch': 1} {'type': 'loss', 'content': 0.1926386058330536, 'timestamp': '2025-09-30 22:17:06.206908', 'step': 3812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:06.264529', 'step': 3812, 'epoch': 1} {'type': 'loss', 'content': 0.16343536972999573, 'timestamp': '2025-09-30 22:17:06.269528', 'step': 3813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.339020', 'step': 3813, 'epoch': 1} {'type': 'loss', 'content': 0.13690999150276184, 'timestamp': '2025-09-30 22:17:06.342490', 'step': 3814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.400891', 'step': 3814, 'epoch': 1} {'type': 'loss', 'content': 0.11974282562732697, 'timestamp': '2025-09-30 22:17:06.404008', 'step': 3815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:06.461579', 'step': 3815, 'epoch': 1} {'type': 'loss', 'content': 0.2062116414308548, 'timestamp': '2025-09-30 22:17:06.467905', 'step': 3816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:06.524245', 'step': 3816, 'epoch': 1} {'type': 'loss', 'content': 0.17614614963531494, 'timestamp': '2025-09-30 22:17:06.528259', 'step': 3817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:06.591741', 'step': 3817, 'epoch': 1} {'type': 'loss', 'content': 0.1114412322640419, 'timestamp': '2025-09-30 22:17:06.595119', 'step': 3818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:06.651304', 'step': 3818, 'epoch': 1} {'type': 'loss', 'content': 0.1739480048418045, 'timestamp': '2025-09-30 22:17:06.656649', 'step': 3819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.719986', 'step': 3819, 'epoch': 1} {'type': 'loss', 'content': 0.14744766056537628, 'timestamp': '2025-09-30 22:17:06.730705', 'step': 3820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.789708', 'step': 3820, 'epoch': 1} {'type': 'loss', 'content': 0.12779705226421356, 'timestamp': '2025-09-30 22:17:06.793187', 'step': 3821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.853883', 'step': 3821, 'epoch': 1} {'type': 'loss', 'content': 0.18917496502399445, 'timestamp': '2025-09-30 22:17:06.857571', 'step': 3822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.915215', 'step': 3822, 'epoch': 1} {'type': 'loss', 'content': 0.10252740979194641, 'timestamp': '2025-09-30 22:17:06.921295', 'step': 3823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:06.979742', 'step': 3823, 'epoch': 1} {'type': 'loss', 'content': 0.2064761072397232, 'timestamp': '2025-09-30 22:17:06.995991', 'step': 3824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.053574', 'step': 3824, 'epoch': 1} {'type': 'loss', 'content': 0.12448261678218842, 'timestamp': '2025-09-30 22:17:07.062546', 'step': 3825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:07.130187', 'step': 3825, 'epoch': 1} {'type': 'loss', 'content': 0.18961137533187866, 'timestamp': '2025-09-30 22:17:07.133352', 'step': 3826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:07.190190', 'step': 3826, 'epoch': 1} {'type': 'loss', 'content': 0.14450114965438843, 'timestamp': '2025-09-30 22:17:07.193072', 'step': 3827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.249155', 'step': 3827, 'epoch': 1} {'type': 'loss', 'content': 0.21073424816131592, 'timestamp': '2025-09-30 22:17:07.256133', 'step': 3828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:07.315146', 'step': 3828, 'epoch': 1} {'type': 'loss', 'content': 0.15699663758277893, 'timestamp': '2025-09-30 22:17:07.320783', 'step': 3829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.376961', 'step': 3829, 'epoch': 1} {'type': 'loss', 'content': 0.11510851979255676, 'timestamp': '2025-09-30 22:17:07.380003', 'step': 3830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.439549', 'step': 3830, 'epoch': 1} {'type': 'loss', 'content': 0.15575318038463593, 'timestamp': '2025-09-30 22:17:07.442728', 'step': 3831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:07.498818', 'step': 3831, 'epoch': 1} {'type': 'loss', 'content': 0.20643553137779236, 'timestamp': '2025-09-30 22:17:07.505664', 'step': 3832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.561183', 'step': 3832, 'epoch': 1} {'type': 'loss', 'content': 0.24748346209526062, 'timestamp': '2025-09-30 22:17:07.565000', 'step': 3833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.622514', 'step': 3833, 'epoch': 1} {'type': 'loss', 'content': 0.20403511822223663, 'timestamp': '2025-09-30 22:17:07.630397', 'step': 3834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:07.690811', 'step': 3834, 'epoch': 1} {'type': 'loss', 'content': 0.2467755675315857, 'timestamp': '2025-09-30 22:17:07.694412', 'step': 3835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:07.751152', 'step': 3835, 'epoch': 1} {'type': 'loss', 'content': 0.15645965933799744, 'timestamp': '2025-09-30 22:17:07.757812', 'step': 3836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.813834', 'step': 3836, 'epoch': 1} {'type': 'loss', 'content': 0.20910869538784027, 'timestamp': '2025-09-30 22:17:07.822992', 'step': 3837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:07.879478', 'step': 3837, 'epoch': 1} {'type': 'loss', 'content': 0.13678182661533356, 'timestamp': '2025-09-30 22:17:07.882452', 'step': 3838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.938026', 'step': 3838, 'epoch': 1} {'type': 'loss', 'content': 0.16970305144786835, 'timestamp': '2025-09-30 22:17:07.941018', 'step': 3839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:07.997349', 'step': 3839, 'epoch': 1} {'type': 'loss', 'content': 0.1707683503627777, 'timestamp': '2025-09-30 22:17:08.003807', 'step': 3840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:08.059281', 'step': 3840, 'epoch': 1} {'type': 'loss', 'content': 0.17522244155406952, 'timestamp': '2025-09-30 22:17:08.067010', 'step': 3841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:08.129272', 'step': 3841, 'epoch': 1} {'type': 'loss', 'content': 0.2407991737127304, 'timestamp': '2025-09-30 22:17:08.132437', 'step': 3842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:08.189058', 'step': 3842, 'epoch': 1} {'type': 'loss', 'content': 0.20345723628997803, 'timestamp': '2025-09-30 22:17:08.192140', 'step': 3843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:08.248877', 'step': 3843, 'epoch': 1} {'type': 'loss', 'content': 0.20104745030403137, 'timestamp': '2025-09-30 22:17:08.256155', 'step': 3844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:08.321380', 'step': 3844, 'epoch': 1} {'type': 'loss', 'content': 0.2590937912464142, 'timestamp': '2025-09-30 22:17:08.334338', 'step': 3845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:08.390905', 'step': 3845, 'epoch': 1} {'type': 'loss', 'content': 0.1860693395137787, 'timestamp': '2025-09-30 22:17:08.393619', 'step': 3846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:08.453103', 'step': 3846, 'epoch': 1} {'type': 'loss', 'content': 0.27755337953567505, 'timestamp': '2025-09-30 22:17:08.456425', 'step': 3847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:08.516772', 'step': 3847, 'epoch': 1} {'type': 'loss', 'content': 0.2266845703125, 'timestamp': '2025-09-30 22:17:08.523313', 'step': 3848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:08.583705', 'step': 3848, 'epoch': 1} {'type': 'loss', 'content': 0.20923250913619995, 'timestamp': '2025-09-30 22:17:08.596107', 'step': 3849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:08.664227', 'step': 3849, 'epoch': 1} {'type': 'loss', 'content': 0.16036970913410187, 'timestamp': '2025-09-30 22:17:08.667146', 'step': 3850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:08.724663', 'step': 3850, 'epoch': 1} {'type': 'loss', 'content': 0.19804616272449493, 'timestamp': '2025-09-30 22:17:08.727979', 'step': 3851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:08.788990', 'step': 3851, 'epoch': 1} {'type': 'loss', 'content': 0.21475622057914734, 'timestamp': '2025-09-30 22:17:08.795511', 'step': 3852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:08.851162', 'step': 3852, 'epoch': 1} {'type': 'loss', 'content': 0.18580937385559082, 'timestamp': '2025-09-30 22:17:08.854069', 'step': 3853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:08.911533', 'step': 3853, 'epoch': 1} {'type': 'loss', 'content': 0.1544746458530426, 'timestamp': '2025-09-30 22:17:08.914910', 'step': 3854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:08.971266', 'step': 3854, 'epoch': 1} {'type': 'loss', 'content': 0.1641632616519928, 'timestamp': '2025-09-30 22:17:08.974670', 'step': 3855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:09.031544', 'step': 3855, 'epoch': 1} {'type': 'loss', 'content': 0.17772062122821808, 'timestamp': '2025-09-30 22:17:09.043585', 'step': 3856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:09.100086', 'step': 3856, 'epoch': 1} {'type': 'loss', 'content': 0.16880199313163757, 'timestamp': '2025-09-30 22:17:09.114900', 'step': 3857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:09.173348', 'step': 3857, 'epoch': 1} {'type': 'loss', 'content': 0.18585768342018127, 'timestamp': '2025-09-30 22:17:09.182180', 'step': 3858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:09.240676', 'step': 3858, 'epoch': 1} {'type': 'loss', 'content': 0.12305424362421036, 'timestamp': '2025-09-30 22:17:09.244082', 'step': 3859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:09.304588', 'step': 3859, 'epoch': 1} {'type': 'loss', 'content': 0.10256169736385345, 'timestamp': '2025-09-30 22:17:09.319538', 'step': 3860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:09.380117', 'step': 3860, 'epoch': 1} {'type': 'loss', 'content': 0.2718910574913025, 'timestamp': '2025-09-30 22:17:09.386785', 'step': 3861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:09.442760', 'step': 3861, 'epoch': 1} {'type': 'loss', 'content': 0.1529831439256668, 'timestamp': '2025-09-30 22:17:09.447180', 'step': 3862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:09.504051', 'step': 3862, 'epoch': 1} {'type': 'loss', 'content': 0.15880246460437775, 'timestamp': '2025-09-30 22:17:09.510125', 'step': 3863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:09.566847', 'step': 3863, 'epoch': 1} {'type': 'loss', 'content': 0.0998791754245758, 'timestamp': '2025-09-30 22:17:09.579173', 'step': 3864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:09.635590', 'step': 3864, 'epoch': 1} {'type': 'loss', 'content': 0.19879895448684692, 'timestamp': '2025-09-30 22:17:09.638385', 'step': 3865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:09.700527', 'step': 3865, 'epoch': 1} {'type': 'loss', 'content': 0.25966301560401917, 'timestamp': '2025-09-30 22:17:09.715667', 'step': 3866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:09.773727', 'step': 3866, 'epoch': 1} {'type': 'loss', 'content': 0.17599277198314667, 'timestamp': '2025-09-30 22:17:09.776780', 'step': 3867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:09.833576', 'step': 3867, 'epoch': 1} {'type': 'loss', 'content': 0.11094621568918228, 'timestamp': '2025-09-30 22:17:09.840960', 'step': 3868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:09.904063', 'step': 3868, 'epoch': 1} {'type': 'loss', 'content': 0.174947127699852, 'timestamp': '2025-09-30 22:17:09.917243', 'step': 3869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:09.984561', 'step': 3869, 'epoch': 1} {'type': 'loss', 'content': 0.18018777668476105, 'timestamp': '2025-09-30 22:17:09.988317', 'step': 3870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:10.056126', 'step': 3870, 'epoch': 1} {'type': 'loss', 'content': 0.11699062585830688, 'timestamp': '2025-09-30 22:17:10.069899', 'step': 3871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:10.128279', 'step': 3871, 'epoch': 1} {'type': 'loss', 'content': 0.26775214076042175, 'timestamp': '2025-09-30 22:17:10.136667', 'step': 3872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:10.194116', 'step': 3872, 'epoch': 1} {'type': 'loss', 'content': 0.24543660879135132, 'timestamp': '2025-09-30 22:17:10.196901', 'step': 3873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:10.253601', 'step': 3873, 'epoch': 1} {'type': 'loss', 'content': 0.1340547353029251, 'timestamp': '2025-09-30 22:17:10.266378', 'step': 3874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:10.330869', 'step': 3874, 'epoch': 1} {'type': 'loss', 'content': 0.09472605586051941, 'timestamp': '2025-09-30 22:17:10.342558', 'step': 3875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:10.411675', 'step': 3875, 'epoch': 1} {'type': 'loss', 'content': 0.1393953412771225, 'timestamp': '2025-09-30 22:17:10.419957', 'step': 3876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:10.477044', 'step': 3876, 'epoch': 1} {'type': 'loss', 'content': 0.12402123957872391, 'timestamp': '2025-09-30 22:17:10.480497', 'step': 3877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:10.538111', 'step': 3877, 'epoch': 1} {'type': 'loss', 'content': 0.10016841441392899, 'timestamp': '2025-09-30 22:17:10.541857', 'step': 3878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:10.605524', 'step': 3878, 'epoch': 1} {'type': 'loss', 'content': 0.21386386454105377, 'timestamp': '2025-09-30 22:17:10.614580', 'step': 3879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:10.671105', 'step': 3879, 'epoch': 1} {'type': 'loss', 'content': 0.1182173639535904, 'timestamp': '2025-09-30 22:17:10.678617', 'step': 3880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:10.747202', 'step': 3880, 'epoch': 1} {'type': 'loss', 'content': 0.20101295411586761, 'timestamp': '2025-09-30 22:17:10.751015', 'step': 3881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:10.807639', 'step': 3881, 'epoch': 1} {'type': 'loss', 'content': 0.18371103703975677, 'timestamp': '2025-09-30 22:17:10.810461', 'step': 3882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:10.867073', 'step': 3882, 'epoch': 1} {'type': 'loss', 'content': 0.1257994920015335, 'timestamp': '2025-09-30 22:17:10.871161', 'step': 3883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:10.927914', 'step': 3883, 'epoch': 1} {'type': 'loss', 'content': 0.1867363452911377, 'timestamp': '2025-09-30 22:17:10.935763', 'step': 3884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:11.004831', 'step': 3884, 'epoch': 1} {'type': 'loss', 'content': 0.22729560732841492, 'timestamp': '2025-09-30 22:17:11.009584', 'step': 3885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:11.081307', 'step': 3885, 'epoch': 1} {'type': 'loss', 'content': 0.1487751454114914, 'timestamp': '2025-09-30 22:17:11.086059', 'step': 3886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:11.153620', 'step': 3886, 'epoch': 1} {'type': 'loss', 'content': 0.13133586943149567, 'timestamp': '2025-09-30 22:17:11.157434', 'step': 3887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:11.213392', 'step': 3887, 'epoch': 1} {'type': 'loss', 'content': 0.15117685496807098, 'timestamp': '2025-09-30 22:17:11.232172', 'step': 3888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:11.289478', 'step': 3888, 'epoch': 1} {'type': 'loss', 'content': 0.19550731778144836, 'timestamp': '2025-09-30 22:17:11.293822', 'step': 3889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:11.351575', 'step': 3889, 'epoch': 1} {'type': 'loss', 'content': 0.2065991759300232, 'timestamp': '2025-09-30 22:17:11.355409', 'step': 3890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:11.417200', 'step': 3890, 'epoch': 1} {'type': 'loss', 'content': 0.17232124507427216, 'timestamp': '2025-09-30 22:17:11.420556', 'step': 3891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:11.477930', 'step': 3891, 'epoch': 1} {'type': 'loss', 'content': 0.18922294676303864, 'timestamp': '2025-09-30 22:17:11.485947', 'step': 3892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:11.547352', 'step': 3892, 'epoch': 1} {'type': 'loss', 'content': 0.15478502213954926, 'timestamp': '2025-09-30 22:17:11.556466', 'step': 3893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:11.616364', 'step': 3893, 'epoch': 1} {'type': 'loss', 'content': 0.17488083243370056, 'timestamp': '2025-09-30 22:17:11.629930', 'step': 3894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:11.688876', 'step': 3894, 'epoch': 1} {'type': 'loss', 'content': 0.1600552499294281, 'timestamp': '2025-09-30 22:17:11.693127', 'step': 3895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:11.750656', 'step': 3895, 'epoch': 1} {'type': 'loss', 'content': 0.14785021543502808, 'timestamp': '2025-09-30 22:17:11.759051', 'step': 3896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:11.817060', 'step': 3896, 'epoch': 1} {'type': 'loss', 'content': 0.12830297648906708, 'timestamp': '2025-09-30 22:17:11.820421', 'step': 3897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:11.882658', 'step': 3897, 'epoch': 1} {'type': 'loss', 'content': 0.17819911241531372, 'timestamp': '2025-09-30 22:17:11.895516', 'step': 3898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:11.958690', 'step': 3898, 'epoch': 1} {'type': 'loss', 'content': 0.24579831957817078, 'timestamp': '2025-09-30 22:17:11.973886', 'step': 3899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:12.037183', 'step': 3899, 'epoch': 1} {'type': 'loss', 'content': 0.1816335767507553, 'timestamp': '2025-09-30 22:17:12.045203', 'step': 3900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:12.110062', 'step': 3900, 'epoch': 1} {'type': 'loss', 'content': 0.20092444121837616, 'timestamp': '2025-09-30 22:17:12.114303', 'step': 3901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:12.171897', 'step': 3901, 'epoch': 1} {'type': 'loss', 'content': 0.11851435899734497, 'timestamp': '2025-09-30 22:17:12.177379', 'step': 3902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:12.239948', 'step': 3902, 'epoch': 1} {'type': 'loss', 'content': 0.15405011177062988, 'timestamp': '2025-09-30 22:17:12.243020', 'step': 3903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:12.300122', 'step': 3903, 'epoch': 1} {'type': 'loss', 'content': 0.15254515409469604, 'timestamp': '2025-09-30 22:17:12.310618', 'step': 3904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:12.371049', 'step': 3904, 'epoch': 1} {'type': 'loss', 'content': 0.17534402012825012, 'timestamp': '2025-09-30 22:17:12.387383', 'step': 3905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:12.463496', 'step': 3905, 'epoch': 1} {'type': 'loss', 'content': 0.13077962398529053, 'timestamp': '2025-09-30 22:17:12.467023', 'step': 3906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:12.524758', 'step': 3906, 'epoch': 1} {'type': 'loss', 'content': 0.17807479202747345, 'timestamp': '2025-09-30 22:17:12.527995', 'step': 3907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:12.594958', 'step': 3907, 'epoch': 1} {'type': 'loss', 'content': 0.11879109591245651, 'timestamp': '2025-09-30 22:17:12.601463', 'step': 3908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:12.663755', 'step': 3908, 'epoch': 1} {'type': 'loss', 'content': 0.18525190651416779, 'timestamp': '2025-09-30 22:17:12.667208', 'step': 3909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:12.733084', 'step': 3909, 'epoch': 1} {'type': 'loss', 'content': 0.18661020696163177, 'timestamp': '2025-09-30 22:17:12.742496', 'step': 3910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:12.801122', 'step': 3910, 'epoch': 1} {'type': 'loss', 'content': 0.1591920256614685, 'timestamp': '2025-09-30 22:17:12.804369', 'step': 3911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:12.860955', 'step': 3911, 'epoch': 1} {'type': 'loss', 'content': 0.13191767036914825, 'timestamp': '2025-09-30 22:17:12.872457', 'step': 3912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:12.929349', 'step': 3912, 'epoch': 1} {'type': 'loss', 'content': 0.14246299862861633, 'timestamp': '2025-09-30 22:17:12.931889', 'step': 3913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:12.987925', 'step': 3913, 'epoch': 1} {'type': 'loss', 'content': 0.13725632429122925, 'timestamp': '2025-09-30 22:17:12.992166', 'step': 3914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:13.062446', 'step': 3914, 'epoch': 1} {'type': 'loss', 'content': 0.19509172439575195, 'timestamp': '2025-09-30 22:17:13.069605', 'step': 3915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:13.127185', 'step': 3915, 'epoch': 1} {'type': 'loss', 'content': 0.10821639746427536, 'timestamp': '2025-09-30 22:17:13.134349', 'step': 3916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:13.191229', 'step': 3916, 'epoch': 1} {'type': 'loss', 'content': 0.20595332980155945, 'timestamp': '2025-09-30 22:17:13.195639', 'step': 3917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:13.255631', 'step': 3917, 'epoch': 1} {'type': 'loss', 'content': 0.2687918245792389, 'timestamp': '2025-09-30 22:17:13.260699', 'step': 3918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:13.336214', 'step': 3918, 'epoch': 1} {'type': 'loss', 'content': 0.2500414252281189, 'timestamp': '2025-09-30 22:17:13.339193', 'step': 3919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:13.400430', 'step': 3919, 'epoch': 1} {'type': 'loss', 'content': 0.2138916552066803, 'timestamp': '2025-09-30 22:17:13.407569', 'step': 3920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:13.465017', 'step': 3920, 'epoch': 1} {'type': 'loss', 'content': 0.1274440586566925, 'timestamp': '2025-09-30 22:17:13.474404', 'step': 3921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:13.548461', 'step': 3921, 'epoch': 1} {'type': 'loss', 'content': 0.1842930018901825, 'timestamp': '2025-09-30 22:17:13.551173', 'step': 3922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:13.609330', 'step': 3922, 'epoch': 1} {'type': 'loss', 'content': 0.15104930102825165, 'timestamp': '2025-09-30 22:17:13.612144', 'step': 3923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:13.669402', 'step': 3923, 'epoch': 1} {'type': 'loss', 'content': 0.13918694853782654, 'timestamp': '2025-09-30 22:17:13.682151', 'step': 3924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:13.739837', 'step': 3924, 'epoch': 1} {'type': 'loss', 'content': 0.12602493166923523, 'timestamp': '2025-09-30 22:17:13.746331', 'step': 3925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:13.803147', 'step': 3925, 'epoch': 1} {'type': 'loss', 'content': 0.1939975917339325, 'timestamp': '2025-09-30 22:17:13.806304', 'step': 3926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:13.864427', 'step': 3926, 'epoch': 1} {'type': 'loss', 'content': 0.14211520552635193, 'timestamp': '2025-09-30 22:17:13.867696', 'step': 3927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:13.933931', 'step': 3927, 'epoch': 1} {'type': 'loss', 'content': 0.18575067818164825, 'timestamp': '2025-09-30 22:17:13.940981', 'step': 3928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:13.996687', 'step': 3928, 'epoch': 1} {'type': 'loss', 'content': 0.12312085181474686, 'timestamp': '2025-09-30 22:17:14.013265', 'step': 3929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:14.078703', 'step': 3929, 'epoch': 1} {'type': 'loss', 'content': 0.12346722930669785, 'timestamp': '2025-09-30 22:17:14.083066', 'step': 3930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:14.150755', 'step': 3930, 'epoch': 1} {'type': 'loss', 'content': 0.2476789355278015, 'timestamp': '2025-09-30 22:17:14.153389', 'step': 3931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:14.210520', 'step': 3931, 'epoch': 1} {'type': 'loss', 'content': 0.16409507393836975, 'timestamp': '2025-09-30 22:17:14.217522', 'step': 3932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:14.277063', 'step': 3932, 'epoch': 1} {'type': 'loss', 'content': 0.1759197860956192, 'timestamp': '2025-09-30 22:17:14.280446', 'step': 3933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:14.337517', 'step': 3933, 'epoch': 1} {'type': 'loss', 'content': 0.1477823704481125, 'timestamp': '2025-09-30 22:17:14.340223', 'step': 3934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:14.408454', 'step': 3934, 'epoch': 1} {'type': 'loss', 'content': 0.2144869714975357, 'timestamp': '2025-09-30 22:17:14.411037', 'step': 3935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:14.468772', 'step': 3935, 'epoch': 1} {'type': 'loss', 'content': 0.10371546447277069, 'timestamp': '2025-09-30 22:17:14.475638', 'step': 3936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:14.531405', 'step': 3936, 'epoch': 1} {'type': 'loss', 'content': 0.16974909603595734, 'timestamp': '2025-09-30 22:17:14.534645', 'step': 3937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:14.591541', 'step': 3937, 'epoch': 1} {'type': 'loss', 'content': 0.15679123997688293, 'timestamp': '2025-09-30 22:17:14.594390', 'step': 3938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:14.650956', 'step': 3938, 'epoch': 1} {'type': 'loss', 'content': 0.1799239218235016, 'timestamp': '2025-09-30 22:17:14.653663', 'step': 3939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:14.710798', 'step': 3939, 'epoch': 1} {'type': 'loss', 'content': 0.191072478890419, 'timestamp': '2025-09-30 22:17:14.717255', 'step': 3940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:14.773010', 'step': 3940, 'epoch': 1} {'type': 'loss', 'content': 0.2033502757549286, 'timestamp': '2025-09-30 22:17:14.778763', 'step': 3941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:14.838008', 'step': 3941, 'epoch': 1} {'type': 'loss', 'content': 0.2226664423942566, 'timestamp': '2025-09-30 22:17:14.840866', 'step': 3942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:14.904513', 'step': 3942, 'epoch': 1} {'type': 'loss', 'content': 0.20741887390613556, 'timestamp': '2025-09-30 22:17:14.907962', 'step': 3943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:14.968228', 'step': 3943, 'epoch': 1} {'type': 'loss', 'content': 0.18265870213508606, 'timestamp': '2025-09-30 22:17:14.975028', 'step': 3944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:15.031444', 'step': 3944, 'epoch': 1} {'type': 'loss', 'content': 0.20491228997707367, 'timestamp': '2025-09-30 22:17:15.035346', 'step': 3945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:15.092140', 'step': 3945, 'epoch': 1} {'type': 'loss', 'content': 0.23858225345611572, 'timestamp': '2025-09-30 22:17:15.095749', 'step': 3946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:15.153904', 'step': 3946, 'epoch': 1} {'type': 'loss', 'content': 0.1756429523229599, 'timestamp': '2025-09-30 22:17:15.156982', 'step': 3947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:15.227748', 'step': 3947, 'epoch': 1} {'type': 'loss', 'content': 0.22199124097824097, 'timestamp': '2025-09-30 22:17:15.234965', 'step': 3948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:15.291435', 'step': 3948, 'epoch': 1} {'type': 'loss', 'content': 0.14894531667232513, 'timestamp': '2025-09-30 22:17:15.294092', 'step': 3949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:15.351275', 'step': 3949, 'epoch': 1} {'type': 'loss', 'content': 0.16474378108978271, 'timestamp': '2025-09-30 22:17:15.354254', 'step': 3950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:15.410997', 'step': 3950, 'epoch': 1} {'type': 'loss', 'content': 0.2973635494709015, 'timestamp': '2025-09-30 22:17:15.419102', 'step': 3951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:15.476594', 'step': 3951, 'epoch': 1} {'type': 'loss', 'content': 0.15440109372138977, 'timestamp': '2025-09-30 22:17:15.483551', 'step': 3952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:15.540344', 'step': 3952, 'epoch': 1} {'type': 'loss', 'content': 0.2285670042037964, 'timestamp': '2025-09-30 22:17:15.544021', 'step': 3953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:15.601322', 'step': 3953, 'epoch': 1} {'type': 'loss', 'content': 0.15138299763202667, 'timestamp': '2025-09-30 22:17:15.604102', 'step': 3954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:15.676274', 'step': 3954, 'epoch': 1} {'type': 'loss', 'content': 0.12450204789638519, 'timestamp': '2025-09-30 22:17:15.679527', 'step': 3955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:15.742620', 'step': 3955, 'epoch': 1} {'type': 'loss', 'content': 0.10072515904903412, 'timestamp': '2025-09-30 22:17:15.749746', 'step': 3956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:15.805706', 'step': 3956, 'epoch': 1} {'type': 'loss', 'content': 0.1484702229499817, 'timestamp': '2025-09-30 22:17:15.808412', 'step': 3957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:15.865187', 'step': 3957, 'epoch': 1} {'type': 'loss', 'content': 0.18999835848808289, 'timestamp': '2025-09-30 22:17:15.872777', 'step': 3958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:15.930747', 'step': 3958, 'epoch': 1} {'type': 'loss', 'content': 0.20364724099636078, 'timestamp': '2025-09-30 22:17:15.934900', 'step': 3959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:15.992736', 'step': 3959, 'epoch': 1} {'type': 'loss', 'content': 0.11173073947429657, 'timestamp': '2025-09-30 22:17:15.999235', 'step': 3960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:16.059403', 'step': 3960, 'epoch': 1} {'type': 'loss', 'content': 0.22617079317569733, 'timestamp': '2025-09-30 22:17:16.065519', 'step': 3961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:16.122958', 'step': 3961, 'epoch': 1} {'type': 'loss', 'content': 0.16095468401908875, 'timestamp': '2025-09-30 22:17:16.131000', 'step': 3962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:16.191701', 'step': 3962, 'epoch': 1} {'type': 'loss', 'content': 0.1872490644454956, 'timestamp': '2025-09-30 22:17:16.195635', 'step': 3963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:16.252151', 'step': 3963, 'epoch': 1} {'type': 'loss', 'content': 0.14350992441177368, 'timestamp': '2025-09-30 22:17:16.265936', 'step': 3964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:16.323253', 'step': 3964, 'epoch': 1} {'type': 'loss', 'content': 0.17399117350578308, 'timestamp': '2025-09-30 22:17:16.326985', 'step': 3965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:16.384604', 'step': 3965, 'epoch': 1} {'type': 'loss', 'content': 0.22649069130420685, 'timestamp': '2025-09-30 22:17:16.387620', 'step': 3966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:16.445563', 'step': 3966, 'epoch': 1} {'type': 'loss', 'content': 0.09946607053279877, 'timestamp': '2025-09-30 22:17:16.448459', 'step': 3967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:16.505579', 'step': 3967, 'epoch': 1} {'type': 'loss', 'content': 0.17509646713733673, 'timestamp': '2025-09-30 22:17:16.511839', 'step': 3968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:16.567714', 'step': 3968, 'epoch': 1} {'type': 'loss', 'content': 0.2810453474521637, 'timestamp': '2025-09-30 22:17:16.574970', 'step': 3969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:16.636295', 'step': 3969, 'epoch': 1} {'type': 'loss', 'content': 0.1937369853258133, 'timestamp': '2025-09-30 22:17:16.639351', 'step': 3970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:16.696030', 'step': 3970, 'epoch': 1} {'type': 'loss', 'content': 0.11768612265586853, 'timestamp': '2025-09-30 22:17:16.702443', 'step': 3971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:16.759875', 'step': 3971, 'epoch': 1} {'type': 'loss', 'content': 0.18247352540493011, 'timestamp': '2025-09-30 22:17:16.767384', 'step': 3972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:16.826326', 'step': 3972, 'epoch': 1} {'type': 'loss', 'content': 0.22929629683494568, 'timestamp': '2025-09-30 22:17:16.830378', 'step': 3973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:16.888395', 'step': 3973, 'epoch': 1} {'type': 'loss', 'content': 0.17407624423503876, 'timestamp': '2025-09-30 22:17:16.894255', 'step': 3974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:16.956301', 'step': 3974, 'epoch': 1} {'type': 'loss', 'content': 0.23283806443214417, 'timestamp': '2025-09-30 22:17:16.959304', 'step': 3975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:17.016554', 'step': 3975, 'epoch': 1} {'type': 'loss', 'content': 0.1576833873987198, 'timestamp': '2025-09-30 22:17:17.027060', 'step': 3976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:17.084502', 'step': 3976, 'epoch': 1} {'type': 'loss', 'content': 0.16179205477237701, 'timestamp': '2025-09-30 22:17:17.087351', 'step': 3977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:17.152231', 'step': 3977, 'epoch': 1} {'type': 'loss', 'content': 0.153546541929245, 'timestamp': '2025-09-30 22:17:17.165397', 'step': 3978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:17.228416', 'step': 3978, 'epoch': 1} {'type': 'loss', 'content': 0.17270489037036896, 'timestamp': '2025-09-30 22:17:17.231658', 'step': 3979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:17.298908', 'step': 3979, 'epoch': 1} {'type': 'loss', 'content': 0.211192786693573, 'timestamp': '2025-09-30 22:17:17.308117', 'step': 3980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:17.366629', 'step': 3980, 'epoch': 1} {'type': 'loss', 'content': 0.09909211844205856, 'timestamp': '2025-09-30 22:17:17.376470', 'step': 3981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:17:17.436646', 'step': 3981, 'epoch': 1} {'type': 'loss', 'content': 0.15852218866348267, 'timestamp': '2025-09-30 22:17:17.445097', 'step': 3982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:17.504565', 'step': 3982, 'epoch': 1} {'type': 'loss', 'content': 0.155919149518013, 'timestamp': '2025-09-30 22:17:17.510894', 'step': 3983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:17.573096', 'step': 3983, 'epoch': 1} {'type': 'loss', 'content': 0.0981365293264389, 'timestamp': '2025-09-30 22:17:17.585189', 'step': 3984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:17.641995', 'step': 3984, 'epoch': 1} {'type': 'loss', 'content': 0.1458289623260498, 'timestamp': '2025-09-30 22:17:17.646168', 'step': 3985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:17.713083', 'step': 3985, 'epoch': 1} {'type': 'loss', 'content': 0.15341560542583466, 'timestamp': '2025-09-30 22:17:17.718632', 'step': 3986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:17.775368', 'step': 3986, 'epoch': 1} {'type': 'loss', 'content': 0.13632763922214508, 'timestamp': '2025-09-30 22:17:17.779541', 'step': 3987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:17.839648', 'step': 3987, 'epoch': 1} {'type': 'loss', 'content': 0.09244558215141296, 'timestamp': '2025-09-30 22:17:17.846441', 'step': 3988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:17.907231', 'step': 3988, 'epoch': 1} {'type': 'loss', 'content': 0.1635894626379013, 'timestamp': '2025-09-30 22:17:17.918569', 'step': 3989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:17:17.989397', 'step': 3989, 'epoch': 1} {'type': 'loss', 'content': 0.13968461751937866, 'timestamp': '2025-09-30 22:17:17.992322', 'step': 3990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:18.051098', 'step': 3990, 'epoch': 1} {'type': 'loss', 'content': 0.18939530849456787, 'timestamp': '2025-09-30 22:17:18.065399', 'step': 3991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:18.121993', 'step': 3991, 'epoch': 1} {'type': 'loss', 'content': 0.16192340850830078, 'timestamp': '2025-09-30 22:17:18.134254', 'step': 3992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:18.193740', 'step': 3992, 'epoch': 1} {'type': 'loss', 'content': 0.11521193385124207, 'timestamp': '2025-09-30 22:17:18.196956', 'step': 3993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:18.256426', 'step': 3993, 'epoch': 1} {'type': 'loss', 'content': 0.14937837421894073, 'timestamp': '2025-09-30 22:17:18.265349', 'step': 3994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:18.325158', 'step': 3994, 'epoch': 1} {'type': 'loss', 'content': 0.15507693588733673, 'timestamp': '2025-09-30 22:17:18.335994', 'step': 3995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:18.398911', 'step': 3995, 'epoch': 1} {'type': 'loss', 'content': 0.20549264550209045, 'timestamp': '2025-09-30 22:17:18.405856', 'step': 3996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:18.473134', 'step': 3996, 'epoch': 1} {'type': 'loss', 'content': 0.1617080271244049, 'timestamp': '2025-09-30 22:17:18.480002', 'step': 3997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:18.545763', 'step': 3997, 'epoch': 1} {'type': 'loss', 'content': 0.25001007318496704, 'timestamp': '2025-09-30 22:17:18.552383', 'step': 3998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:18.610615', 'step': 3998, 'epoch': 1} {'type': 'loss', 'content': 0.1424253135919571, 'timestamp': '2025-09-30 22:17:18.623391', 'step': 3999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:18.680888', 'step': 3999, 'epoch': 1} {'type': 'loss', 'content': 0.22127942740917206, 'timestamp': '2025-09-30 22:17:18.687643', 'step': 4000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-30 22:17:19.114812', 'step': 4000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:19.174149', 'step': 4000, 'epoch': 1} {'type': 'loss', 'content': 0.16974686086177826, 'timestamp': '2025-09-30 22:17:19.178071', 'step': 4001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:19.237026', 'step': 4001, 'epoch': 1} {'type': 'loss', 'content': 0.13555662333965302, 'timestamp': '2025-09-30 22:17:19.245022', 'step': 4002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:19.302406', 'step': 4002, 'epoch': 1} {'type': 'loss', 'content': 0.13506875932216644, 'timestamp': '2025-09-30 22:17:19.309818', 'step': 4003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:19.366253', 'step': 4003, 'epoch': 1} {'type': 'loss', 'content': 0.20331981778144836, 'timestamp': '2025-09-30 22:17:19.376609', 'step': 4004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:19.436689', 'step': 4004, 'epoch': 1} {'type': 'loss', 'content': 0.2021474838256836, 'timestamp': '2025-09-30 22:17:19.439250', 'step': 4005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:19.500125', 'step': 4005, 'epoch': 1} {'type': 'loss', 'content': 0.21846948564052582, 'timestamp': '2025-09-30 22:17:19.504661', 'step': 4006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:19.565891', 'step': 4006, 'epoch': 1} {'type': 'loss', 'content': 0.17149385809898376, 'timestamp': '2025-09-30 22:17:19.571687', 'step': 4007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:19.634990', 'step': 4007, 'epoch': 1} {'type': 'loss', 'content': 0.14065653085708618, 'timestamp': '2025-09-30 22:17:19.648639', 'step': 4008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:19.712241', 'step': 4008, 'epoch': 1} {'type': 'loss', 'content': 0.2250262051820755, 'timestamp': '2025-09-30 22:17:19.718104', 'step': 4009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:19.779251', 'step': 4009, 'epoch': 1} {'type': 'loss', 'content': 0.12826551496982574, 'timestamp': '2025-09-30 22:17:19.787102', 'step': 4010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:19.854764', 'step': 4010, 'epoch': 1} {'type': 'loss', 'content': 0.22359582781791687, 'timestamp': '2025-09-30 22:17:19.864576', 'step': 4011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:19.923191', 'step': 4011, 'epoch': 1} {'type': 'loss', 'content': 0.12841393053531647, 'timestamp': '2025-09-30 22:17:19.933234', 'step': 4012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:19.992528', 'step': 4012, 'epoch': 1} {'type': 'loss', 'content': 0.2706992030143738, 'timestamp': '2025-09-30 22:17:19.997858', 'step': 4013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:20.056504', 'step': 4013, 'epoch': 1} {'type': 'loss', 'content': 0.15461987257003784, 'timestamp': '2025-09-30 22:17:20.059507', 'step': 4014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:20.125243', 'step': 4014, 'epoch': 1} {'type': 'loss', 'content': 0.16334596276283264, 'timestamp': '2025-09-30 22:17:20.128979', 'step': 4015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:20.186132', 'step': 4015, 'epoch': 1} {'type': 'loss', 'content': 0.21140511333942413, 'timestamp': '2025-09-30 22:17:20.194182', 'step': 4016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:20.258571', 'step': 4016, 'epoch': 1} {'type': 'loss', 'content': 0.11646895855665207, 'timestamp': '2025-09-30 22:17:20.269354', 'step': 4017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:20.333403', 'step': 4017, 'epoch': 1} {'type': 'loss', 'content': 0.15815192461013794, 'timestamp': '2025-09-30 22:17:20.340161', 'step': 4018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:20.398430', 'step': 4018, 'epoch': 1} {'type': 'loss', 'content': 0.1782589554786682, 'timestamp': '2025-09-30 22:17:20.409389', 'step': 4019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:20.466275', 'step': 4019, 'epoch': 1} {'type': 'loss', 'content': 0.11201012879610062, 'timestamp': '2025-09-30 22:17:20.472461', 'step': 4020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:20.534232', 'step': 4020, 'epoch': 1} {'type': 'loss', 'content': 0.1599280834197998, 'timestamp': '2025-09-30 22:17:20.539978', 'step': 4021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:20.605762', 'step': 4021, 'epoch': 1} {'type': 'loss', 'content': 0.3076224625110626, 'timestamp': '2025-09-30 22:17:20.608764', 'step': 4022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:20.666651', 'step': 4022, 'epoch': 1} {'type': 'loss', 'content': 0.12350797653198242, 'timestamp': '2025-09-30 22:17:20.669351', 'step': 4023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:20.725610', 'step': 4023, 'epoch': 1} {'type': 'loss', 'content': 0.19227492809295654, 'timestamp': '2025-09-30 22:17:20.732001', 'step': 4024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:20.789124', 'step': 4024, 'epoch': 1} {'type': 'loss', 'content': 0.14939916133880615, 'timestamp': '2025-09-30 22:17:20.800861', 'step': 4025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:20.861986', 'step': 4025, 'epoch': 1} {'type': 'loss', 'content': 0.15399053692817688, 'timestamp': '2025-09-30 22:17:20.864713', 'step': 4026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:20.922009', 'step': 4026, 'epoch': 1} {'type': 'loss', 'content': 0.15880803763866425, 'timestamp': '2025-09-30 22:17:20.924847', 'step': 4027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:20.981120', 'step': 4027, 'epoch': 1} {'type': 'loss', 'content': 0.18392746150493622, 'timestamp': '2025-09-30 22:17:20.988012', 'step': 4028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.044755', 'step': 4028, 'epoch': 1} {'type': 'loss', 'content': 0.1996905654668808, 'timestamp': '2025-09-30 22:17:21.051207', 'step': 4029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:21.108454', 'step': 4029, 'epoch': 1} {'type': 'loss', 'content': 0.16730262339115143, 'timestamp': '2025-09-30 22:17:21.118358', 'step': 4030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:21.182349', 'step': 4030, 'epoch': 1} {'type': 'loss', 'content': 0.16718894243240356, 'timestamp': '2025-09-30 22:17:21.190556', 'step': 4031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:21.247196', 'step': 4031, 'epoch': 1} {'type': 'loss', 'content': 0.17373532056808472, 'timestamp': '2025-09-30 22:17:21.266695', 'step': 4032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:21.322841', 'step': 4032, 'epoch': 1} {'type': 'loss', 'content': 0.1766696721315384, 'timestamp': '2025-09-30 22:17:21.331311', 'step': 4033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:21.391404', 'step': 4033, 'epoch': 1} {'type': 'loss', 'content': 0.1784161925315857, 'timestamp': '2025-09-30 22:17:21.394629', 'step': 4034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.452324', 'step': 4034, 'epoch': 1} {'type': 'loss', 'content': 0.17623217403888702, 'timestamp': '2025-09-30 22:17:21.456155', 'step': 4035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:21.513054', 'step': 4035, 'epoch': 1} {'type': 'loss', 'content': 0.22415581345558167, 'timestamp': '2025-09-30 22:17:21.519443', 'step': 4036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.576033', 'step': 4036, 'epoch': 1} {'type': 'loss', 'content': 0.1853581666946411, 'timestamp': '2025-09-30 22:17:21.580048', 'step': 4037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.637357', 'step': 4037, 'epoch': 1} {'type': 'loss', 'content': 0.19867168366909027, 'timestamp': '2025-09-30 22:17:21.650832', 'step': 4038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.707791', 'step': 4038, 'epoch': 1} {'type': 'loss', 'content': 0.15030239522457123, 'timestamp': '2025-09-30 22:17:21.711765', 'step': 4039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:21.770301', 'step': 4039, 'epoch': 1} {'type': 'loss', 'content': 0.08050362020730972, 'timestamp': '2025-09-30 22:17:21.777699', 'step': 4040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.834459', 'step': 4040, 'epoch': 1} {'type': 'loss', 'content': 0.15936990082263947, 'timestamp': '2025-09-30 22:17:21.837339', 'step': 4041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.895157', 'step': 4041, 'epoch': 1} {'type': 'loss', 'content': 0.1364806443452835, 'timestamp': '2025-09-30 22:17:21.898261', 'step': 4042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:21.954738', 'step': 4042, 'epoch': 1} {'type': 'loss', 'content': 0.20337934792041779, 'timestamp': '2025-09-30 22:17:21.958160', 'step': 4043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:22.015058', 'step': 4043, 'epoch': 1} {'type': 'loss', 'content': 0.1558634489774704, 'timestamp': '2025-09-30 22:17:22.023625', 'step': 4044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:22.080556', 'step': 4044, 'epoch': 1} {'type': 'loss', 'content': 0.1569920927286148, 'timestamp': '2025-09-30 22:17:22.090031', 'step': 4045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.147294', 'step': 4045, 'epoch': 1} {'type': 'loss', 'content': 0.12004471570253372, 'timestamp': '2025-09-30 22:17:22.150768', 'step': 4046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:22.206735', 'step': 4046, 'epoch': 1} {'type': 'loss', 'content': 0.15156206488609314, 'timestamp': '2025-09-30 22:17:22.213903', 'step': 4047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:22.274988', 'step': 4047, 'epoch': 1} {'type': 'loss', 'content': 0.22267287969589233, 'timestamp': '2025-09-30 22:17:22.281281', 'step': 4048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.336934', 'step': 4048, 'epoch': 1} {'type': 'loss', 'content': 0.14222118258476257, 'timestamp': '2025-09-30 22:17:22.340038', 'step': 4049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.396743', 'step': 4049, 'epoch': 1} {'type': 'loss', 'content': 0.2168971598148346, 'timestamp': '2025-09-30 22:17:22.405564', 'step': 4050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.466162', 'step': 4050, 'epoch': 1} {'type': 'loss', 'content': 0.1744864135980606, 'timestamp': '2025-09-30 22:17:22.469686', 'step': 4051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:22.529860', 'step': 4051, 'epoch': 1} {'type': 'loss', 'content': 0.18346962332725525, 'timestamp': '2025-09-30 22:17:22.536812', 'step': 4052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:22.598319', 'step': 4052, 'epoch': 1} {'type': 'loss', 'content': 0.08583156764507294, 'timestamp': '2025-09-30 22:17:22.601319', 'step': 4053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.657738', 'step': 4053, 'epoch': 1} {'type': 'loss', 'content': 0.23733749985694885, 'timestamp': '2025-09-30 22:17:22.661138', 'step': 4054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:22.717863', 'step': 4054, 'epoch': 1} {'type': 'loss', 'content': 0.16041100025177002, 'timestamp': '2025-09-30 22:17:22.720720', 'step': 4055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.777288', 'step': 4055, 'epoch': 1} {'type': 'loss', 'content': 0.15089935064315796, 'timestamp': '2025-09-30 22:17:22.789640', 'step': 4056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.846127', 'step': 4056, 'epoch': 1} {'type': 'loss', 'content': 0.20914724469184875, 'timestamp': '2025-09-30 22:17:22.848955', 'step': 4057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:22.905600', 'step': 4057, 'epoch': 1} {'type': 'loss', 'content': 0.2247769832611084, 'timestamp': '2025-09-30 22:17:22.908260', 'step': 4058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:22.969616', 'step': 4058, 'epoch': 1} {'type': 'loss', 'content': 0.1818849891424179, 'timestamp': '2025-09-30 22:17:22.973539', 'step': 4059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.032029', 'step': 4059, 'epoch': 1} {'type': 'loss', 'content': 0.2150404304265976, 'timestamp': '2025-09-30 22:17:23.041886', 'step': 4060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:23.097170', 'step': 4060, 'epoch': 1} {'type': 'loss', 'content': 0.2530224621295929, 'timestamp': '2025-09-30 22:17:23.106163', 'step': 4061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:23.162270', 'step': 4061, 'epoch': 1} {'type': 'loss', 'content': 0.17625705897808075, 'timestamp': '2025-09-30 22:17:23.165088', 'step': 4062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.221672', 'step': 4062, 'epoch': 1} {'type': 'loss', 'content': 0.207815021276474, 'timestamp': '2025-09-30 22:17:23.224814', 'step': 4063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:23.281772', 'step': 4063, 'epoch': 1} {'type': 'loss', 'content': 0.26187410950660706, 'timestamp': '2025-09-30 22:17:23.293791', 'step': 4064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.350647', 'step': 4064, 'epoch': 1} {'type': 'loss', 'content': 0.17016485333442688, 'timestamp': '2025-09-30 22:17:23.358998', 'step': 4065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.420275', 'step': 4065, 'epoch': 1} {'type': 'loss', 'content': 0.14712902903556824, 'timestamp': '2025-09-30 22:17:23.424069', 'step': 4066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:23.492277', 'step': 4066, 'epoch': 1} {'type': 'loss', 'content': 0.21115314960479736, 'timestamp': '2025-09-30 22:17:23.495722', 'step': 4067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.552340', 'step': 4067, 'epoch': 1} {'type': 'loss', 'content': 0.11947493255138397, 'timestamp': '2025-09-30 22:17:23.558878', 'step': 4068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:23.614900', 'step': 4068, 'epoch': 1} {'type': 'loss', 'content': 0.17240770161151886, 'timestamp': '2025-09-30 22:17:23.622409', 'step': 4069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:23.679747', 'step': 4069, 'epoch': 1} {'type': 'loss', 'content': 0.15416862070560455, 'timestamp': '2025-09-30 22:17:23.686025', 'step': 4070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.748579', 'step': 4070, 'epoch': 1} {'type': 'loss', 'content': 0.10698235034942627, 'timestamp': '2025-09-30 22:17:23.756111', 'step': 4071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.813358', 'step': 4071, 'epoch': 1} {'type': 'loss', 'content': 0.15885388851165771, 'timestamp': '2025-09-30 22:17:23.819701', 'step': 4072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:23.878415', 'step': 4072, 'epoch': 1} {'type': 'loss', 'content': 0.245090514421463, 'timestamp': '2025-09-30 22:17:23.886048', 'step': 4073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:23.943742', 'step': 4073, 'epoch': 1} {'type': 'loss', 'content': 0.08350313454866409, 'timestamp': '2025-09-30 22:17:23.948129', 'step': 4074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:24.004725', 'step': 4074, 'epoch': 1} {'type': 'loss', 'content': 0.173477903008461, 'timestamp': '2025-09-30 22:17:24.008190', 'step': 4075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:24.071476', 'step': 4075, 'epoch': 1} {'type': 'loss', 'content': 0.1846899688243866, 'timestamp': '2025-09-30 22:17:24.079279', 'step': 4076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:24.134576', 'step': 4076, 'epoch': 1} {'type': 'loss', 'content': 0.1852274239063263, 'timestamp': '2025-09-30 22:17:24.138708', 'step': 4077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.195400', 'step': 4077, 'epoch': 1} {'type': 'loss', 'content': 0.14521938562393188, 'timestamp': '2025-09-30 22:17:24.198167', 'step': 4078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.258952', 'step': 4078, 'epoch': 1} {'type': 'loss', 'content': 0.11707688868045807, 'timestamp': '2025-09-30 22:17:24.263430', 'step': 4079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.319990', 'step': 4079, 'epoch': 1} {'type': 'loss', 'content': 0.25986501574516296, 'timestamp': '2025-09-30 22:17:24.326248', 'step': 4080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.384064', 'step': 4080, 'epoch': 1} {'type': 'loss', 'content': 0.15791857242584229, 'timestamp': '2025-09-30 22:17:24.391900', 'step': 4081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.454531', 'step': 4081, 'epoch': 1} {'type': 'loss', 'content': 0.20530752837657928, 'timestamp': '2025-09-30 22:17:24.458342', 'step': 4082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.515021', 'step': 4082, 'epoch': 1} {'type': 'loss', 'content': 0.1252792775630951, 'timestamp': '2025-09-30 22:17:24.518530', 'step': 4083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:24.575242', 'step': 4083, 'epoch': 1} {'type': 'loss', 'content': 0.18577514588832855, 'timestamp': '2025-09-30 22:17:24.581988', 'step': 4084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:24.637473', 'step': 4084, 'epoch': 1} {'type': 'loss', 'content': 0.2057010382413864, 'timestamp': '2025-09-30 22:17:24.641991', 'step': 4085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.698736', 'step': 4085, 'epoch': 1} {'type': 'loss', 'content': 0.13409240543842316, 'timestamp': '2025-09-30 22:17:24.702116', 'step': 4086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:24.763754', 'step': 4086, 'epoch': 1} {'type': 'loss', 'content': 0.17256246507167816, 'timestamp': '2025-09-30 22:17:24.771528', 'step': 4087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:24.828979', 'step': 4087, 'epoch': 1} {'type': 'loss', 'content': 0.1890757977962494, 'timestamp': '2025-09-30 22:17:24.836710', 'step': 4088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:24.897577', 'step': 4088, 'epoch': 1} {'type': 'loss', 'content': 0.10870763659477234, 'timestamp': '2025-09-30 22:17:24.908748', 'step': 4089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:24.972152', 'step': 4089, 'epoch': 1} {'type': 'loss', 'content': 0.055191829800605774, 'timestamp': '2025-09-30 22:17:24.978369', 'step': 4090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:25.039653', 'step': 4090, 'epoch': 1} {'type': 'loss', 'content': 0.13984555006027222, 'timestamp': '2025-09-30 22:17:25.042766', 'step': 4091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:25.099759', 'step': 4091, 'epoch': 1} {'type': 'loss', 'content': 0.1352231651544571, 'timestamp': '2025-09-30 22:17:25.111335', 'step': 4092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:25.171763', 'step': 4092, 'epoch': 1} {'type': 'loss', 'content': 0.17625002562999725, 'timestamp': '2025-09-30 22:17:25.175185', 'step': 4093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:25.237336', 'step': 4093, 'epoch': 1} {'type': 'loss', 'content': 0.13360971212387085, 'timestamp': '2025-09-30 22:17:25.240204', 'step': 4094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:25.315040', 'step': 4094, 'epoch': 1} {'type': 'loss', 'content': 0.18293361365795135, 'timestamp': '2025-09-30 22:17:25.328024', 'step': 4095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:25.396882', 'step': 4095, 'epoch': 1} {'type': 'loss', 'content': 0.20293939113616943, 'timestamp': '2025-09-30 22:17:25.403938', 'step': 4096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:25.461959', 'step': 4096, 'epoch': 1} {'type': 'loss', 'content': 0.13310782611370087, 'timestamp': '2025-09-30 22:17:25.465934', 'step': 4097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:25.523632', 'step': 4097, 'epoch': 1} {'type': 'loss', 'content': 0.15307870507240295, 'timestamp': '2025-09-30 22:17:25.527266', 'step': 4098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:25.585288', 'step': 4098, 'epoch': 1} {'type': 'loss', 'content': 0.2354496568441391, 'timestamp': '2025-09-30 22:17:25.590528', 'step': 4099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:25.650513', 'step': 4099, 'epoch': 1} {'type': 'loss', 'content': 0.16628798842430115, 'timestamp': '2025-09-30 22:17:25.657185', 'step': 4100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:25.724924', 'step': 4100, 'epoch': 1} {'type': 'loss', 'content': 0.19432790577411652, 'timestamp': '2025-09-30 22:17:25.728060', 'step': 4101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:25.784780', 'step': 4101, 'epoch': 1} {'type': 'loss', 'content': 0.15163494646549225, 'timestamp': '2025-09-30 22:17:25.788971', 'step': 4102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:25.846852', 'step': 4102, 'epoch': 1} {'type': 'loss', 'content': 0.25396668910980225, 'timestamp': '2025-09-30 22:17:25.850274', 'step': 4103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:25.922646', 'step': 4103, 'epoch': 1} {'type': 'loss', 'content': 0.12206915020942688, 'timestamp': '2025-09-30 22:17:25.929698', 'step': 4104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:25.993398', 'step': 4104, 'epoch': 1} {'type': 'loss', 'content': 0.09190981835126877, 'timestamp': '2025-09-30 22:17:25.996405', 'step': 4105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:26.053129', 'step': 4105, 'epoch': 1} {'type': 'loss', 'content': 0.17451906204223633, 'timestamp': '2025-09-30 22:17:26.061485', 'step': 4106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:26.118575', 'step': 4106, 'epoch': 1} {'type': 'loss', 'content': 0.21218349039554596, 'timestamp': '2025-09-30 22:17:26.129636', 'step': 4107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:26.187608', 'step': 4107, 'epoch': 1} {'type': 'loss', 'content': 0.18544504046440125, 'timestamp': '2025-09-30 22:17:26.195365', 'step': 4108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:26.252064', 'step': 4108, 'epoch': 1} {'type': 'loss', 'content': 0.17630116641521454, 'timestamp': '2025-09-30 22:17:26.259700', 'step': 4109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:26.320482', 'step': 4109, 'epoch': 1} {'type': 'loss', 'content': 0.12999877333641052, 'timestamp': '2025-09-30 22:17:26.326860', 'step': 4110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:26.384548', 'step': 4110, 'epoch': 1} {'type': 'loss', 'content': 0.28812918066978455, 'timestamp': '2025-09-30 22:17:26.388278', 'step': 4111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:26.445352', 'step': 4111, 'epoch': 1} {'type': 'loss', 'content': 0.12315048277378082, 'timestamp': '2025-09-30 22:17:26.463072', 'step': 4112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:26.521713', 'step': 4112, 'epoch': 1} {'type': 'loss', 'content': 0.1761590838432312, 'timestamp': '2025-09-30 22:17:26.529610', 'step': 4113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:26.587584', 'step': 4113, 'epoch': 1} {'type': 'loss', 'content': 0.1488420069217682, 'timestamp': '2025-09-30 22:17:26.591084', 'step': 4114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:26.648953', 'step': 4114, 'epoch': 1} {'type': 'loss', 'content': 0.12640297412872314, 'timestamp': '2025-09-30 22:17:26.655795', 'step': 4115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:26.717012', 'step': 4115, 'epoch': 1} {'type': 'loss', 'content': 0.07860716432332993, 'timestamp': '2025-09-30 22:17:26.724274', 'step': 4116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:26.786243', 'step': 4116, 'epoch': 1} {'type': 'loss', 'content': 0.15322153270244598, 'timestamp': '2025-09-30 22:17:26.803336', 'step': 4117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:26.860551', 'step': 4117, 'epoch': 1} {'type': 'loss', 'content': 0.1351243406534195, 'timestamp': '2025-09-30 22:17:26.869477', 'step': 4118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:26.926418', 'step': 4118, 'epoch': 1} {'type': 'loss', 'content': 0.19669991731643677, 'timestamp': '2025-09-30 22:17:26.929186', 'step': 4119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:26.985454', 'step': 4119, 'epoch': 1} {'type': 'loss', 'content': 0.1612503081560135, 'timestamp': '2025-09-30 22:17:26.991879', 'step': 4120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:27.047851', 'step': 4120, 'epoch': 1} {'type': 'loss', 'content': 0.2140759974718094, 'timestamp': '2025-09-30 22:17:27.052122', 'step': 4121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:27.108543', 'step': 4121, 'epoch': 1} {'type': 'loss', 'content': 0.1470792293548584, 'timestamp': '2025-09-30 22:17:27.112758', 'step': 4122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:27.169296', 'step': 4122, 'epoch': 1} {'type': 'loss', 'content': 0.22816860675811768, 'timestamp': '2025-09-30 22:17:27.173775', 'step': 4123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:27.230935', 'step': 4123, 'epoch': 1} {'type': 'loss', 'content': 0.17582795023918152, 'timestamp': '2025-09-30 22:17:27.238787', 'step': 4124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:27.296503', 'step': 4124, 'epoch': 1} {'type': 'loss', 'content': 0.1994769424200058, 'timestamp': '2025-09-30 22:17:27.301884', 'step': 4125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:27.361256', 'step': 4125, 'epoch': 1} {'type': 'loss', 'content': 0.22967036068439484, 'timestamp': '2025-09-30 22:17:27.366628', 'step': 4126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:27.424635', 'step': 4126, 'epoch': 1} {'type': 'loss', 'content': 0.11739479005336761, 'timestamp': '2025-09-30 22:17:27.428560', 'step': 4127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:27.487334', 'step': 4127, 'epoch': 1} {'type': 'loss', 'content': 0.13792872428894043, 'timestamp': '2025-09-30 22:17:27.496141', 'step': 4128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:27.551544', 'step': 4128, 'epoch': 1} {'type': 'loss', 'content': 0.09737325459718704, 'timestamp': '2025-09-30 22:17:27.556768', 'step': 4129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:27.615551', 'step': 4129, 'epoch': 1} {'type': 'loss', 'content': 0.17180123925209045, 'timestamp': '2025-09-30 22:17:27.620210', 'step': 4130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:27.677681', 'step': 4130, 'epoch': 1} {'type': 'loss', 'content': 0.22500355541706085, 'timestamp': '2025-09-30 22:17:27.682656', 'step': 4131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:27.741359', 'step': 4131, 'epoch': 1} {'type': 'loss', 'content': 0.1362793743610382, 'timestamp': '2025-09-30 22:17:27.748220', 'step': 4132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:27.805761', 'step': 4132, 'epoch': 1} {'type': 'loss', 'content': 0.2240617871284485, 'timestamp': '2025-09-30 22:17:27.812663', 'step': 4133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:27.872984', 'step': 4133, 'epoch': 1} {'type': 'loss', 'content': 0.20496580004692078, 'timestamp': '2025-09-30 22:17:27.876917', 'step': 4134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:27.934978', 'step': 4134, 'epoch': 1} {'type': 'loss', 'content': 0.17179694771766663, 'timestamp': '2025-09-30 22:17:27.938973', 'step': 4135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:27.997837', 'step': 4135, 'epoch': 1} {'type': 'loss', 'content': 0.17387498915195465, 'timestamp': '2025-09-30 22:17:28.003962', 'step': 4136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:28.061643', 'step': 4136, 'epoch': 1} {'type': 'loss', 'content': 0.09981058537960052, 'timestamp': '2025-09-30 22:17:28.065631', 'step': 4137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:28.121740', 'step': 4137, 'epoch': 1} {'type': 'loss', 'content': 0.20525772869586945, 'timestamp': '2025-09-30 22:17:28.124467', 'step': 4138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:28.180727', 'step': 4138, 'epoch': 1} {'type': 'loss', 'content': 0.18964436650276184, 'timestamp': '2025-09-30 22:17:28.183330', 'step': 4139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:28.242122', 'step': 4139, 'epoch': 1} {'type': 'loss', 'content': 0.12372542917728424, 'timestamp': '2025-09-30 22:17:28.250445', 'step': 4140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:28.306032', 'step': 4140, 'epoch': 1} {'type': 'loss', 'content': 0.14636698365211487, 'timestamp': '2025-09-30 22:17:28.308546', 'step': 4141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:28.374944', 'step': 4141, 'epoch': 1} {'type': 'loss', 'content': 0.10190311074256897, 'timestamp': '2025-09-30 22:17:28.377561', 'step': 4142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:28.434522', 'step': 4142, 'epoch': 1} {'type': 'loss', 'content': 0.14816203713417053, 'timestamp': '2025-09-30 22:17:28.439127', 'step': 4143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:28.497242', 'step': 4143, 'epoch': 1} {'type': 'loss', 'content': 0.19120733439922333, 'timestamp': '2025-09-30 22:17:28.505008', 'step': 4144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:28.561347', 'step': 4144, 'epoch': 1} {'type': 'loss', 'content': 0.18092584609985352, 'timestamp': '2025-09-30 22:17:28.564359', 'step': 4145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:28.626305', 'step': 4145, 'epoch': 1} {'type': 'loss', 'content': 0.20555508136749268, 'timestamp': '2025-09-30 22:17:28.630659', 'step': 4146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:28.690892', 'step': 4146, 'epoch': 1} {'type': 'loss', 'content': 0.20003145933151245, 'timestamp': '2025-09-30 22:17:28.693556', 'step': 4147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:28.750098', 'step': 4147, 'epoch': 1} {'type': 'loss', 'content': 0.11093170940876007, 'timestamp': '2025-09-30 22:17:28.756116', 'step': 4148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:28.814586', 'step': 4148, 'epoch': 1} {'type': 'loss', 'content': 0.08356863260269165, 'timestamp': '2025-09-30 22:17:28.817652', 'step': 4149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:28.874377', 'step': 4149, 'epoch': 1} {'type': 'loss', 'content': 0.21631833910942078, 'timestamp': '2025-09-30 22:17:28.879102', 'step': 4150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:28.936049', 'step': 4150, 'epoch': 1} {'type': 'loss', 'content': 0.17075936496257782, 'timestamp': '2025-09-30 22:17:28.938753', 'step': 4151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:28.997634', 'step': 4151, 'epoch': 1} {'type': 'loss', 'content': 0.10679294168949127, 'timestamp': '2025-09-30 22:17:29.005625', 'step': 4152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:29.065267', 'step': 4152, 'epoch': 1} {'type': 'loss', 'content': 0.1878119856119156, 'timestamp': '2025-09-30 22:17:29.070886', 'step': 4153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:29.127860', 'step': 4153, 'epoch': 1} {'type': 'loss', 'content': 0.19681672751903534, 'timestamp': '2025-09-30 22:17:29.130737', 'step': 4154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.190572', 'step': 4154, 'epoch': 1} {'type': 'loss', 'content': 0.07491900026798248, 'timestamp': '2025-09-30 22:17:29.195557', 'step': 4155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.252766', 'step': 4155, 'epoch': 1} {'type': 'loss', 'content': 0.1677139699459076, 'timestamp': '2025-09-30 22:17:29.261117', 'step': 4156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:29.317569', 'step': 4156, 'epoch': 1} {'type': 'loss', 'content': 0.21011659502983093, 'timestamp': '2025-09-30 22:17:29.320869', 'step': 4157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:29.389554', 'step': 4157, 'epoch': 1} {'type': 'loss', 'content': 0.16867691278457642, 'timestamp': '2025-09-30 22:17:29.395231', 'step': 4158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.455400', 'step': 4158, 'epoch': 1} {'type': 'loss', 'content': 0.205117329955101, 'timestamp': '2025-09-30 22:17:29.457990', 'step': 4159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.516853', 'step': 4159, 'epoch': 1} {'type': 'loss', 'content': 0.22429850697517395, 'timestamp': '2025-09-30 22:17:29.523625', 'step': 4160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.580568', 'step': 4160, 'epoch': 1} {'type': 'loss', 'content': 0.13905330002307892, 'timestamp': '2025-09-30 22:17:29.585006', 'step': 4161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:29.643614', 'step': 4161, 'epoch': 1} {'type': 'loss', 'content': 0.19693498313426971, 'timestamp': '2025-09-30 22:17:29.648355', 'step': 4162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.705399', 'step': 4162, 'epoch': 1} {'type': 'loss', 'content': 0.13185392320156097, 'timestamp': '2025-09-30 22:17:29.708045', 'step': 4163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:29.765052', 'step': 4163, 'epoch': 1} {'type': 'loss', 'content': 0.09089021384716034, 'timestamp': '2025-09-30 22:17:29.773058', 'step': 4164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.833018', 'step': 4164, 'epoch': 1} {'type': 'loss', 'content': 0.14812326431274414, 'timestamp': '2025-09-30 22:17:29.836271', 'step': 4165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.894634', 'step': 4165, 'epoch': 1} {'type': 'loss', 'content': 0.15674975514411926, 'timestamp': '2025-09-30 22:17:29.903423', 'step': 4166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:29.962421', 'step': 4166, 'epoch': 1} {'type': 'loss', 'content': 0.15255048871040344, 'timestamp': '2025-09-30 22:17:29.965182', 'step': 4167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:30.023731', 'step': 4167, 'epoch': 1} {'type': 'loss', 'content': 0.11138875782489777, 'timestamp': '2025-09-30 22:17:30.030029', 'step': 4168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:30.089120', 'step': 4168, 'epoch': 1} {'type': 'loss', 'content': 0.11165887862443924, 'timestamp': '2025-09-30 22:17:30.091956', 'step': 4169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:30.154152', 'step': 4169, 'epoch': 1} {'type': 'loss', 'content': 0.11879950761795044, 'timestamp': '2025-09-30 22:17:30.156872', 'step': 4170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:30.215791', 'step': 4170, 'epoch': 1} {'type': 'loss', 'content': 0.13855992257595062, 'timestamp': '2025-09-30 22:17:30.218573', 'step': 4171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:30.275603', 'step': 4171, 'epoch': 1} {'type': 'loss', 'content': 0.1445317566394806, 'timestamp': '2025-09-30 22:17:30.283238', 'step': 4172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:30.339056', 'step': 4172, 'epoch': 1} {'type': 'loss', 'content': 0.16528819501399994, 'timestamp': '2025-09-30 22:17:30.342914', 'step': 4173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:30.399130', 'step': 4173, 'epoch': 1} {'type': 'loss', 'content': 0.11119195073843002, 'timestamp': '2025-09-30 22:17:30.402499', 'step': 4174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:30.461674', 'step': 4174, 'epoch': 1} {'type': 'loss', 'content': 0.10073442757129669, 'timestamp': '2025-09-30 22:17:30.464456', 'step': 4175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:30.533395', 'step': 4175, 'epoch': 1} {'type': 'loss', 'content': 0.09319153428077698, 'timestamp': '2025-09-30 22:17:30.540220', 'step': 4176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:30.595935', 'step': 4176, 'epoch': 1} {'type': 'loss', 'content': 0.1756165325641632, 'timestamp': '2025-09-30 22:17:30.598636', 'step': 4177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:30.656735', 'step': 4177, 'epoch': 1} {'type': 'loss', 'content': 0.1610659509897232, 'timestamp': '2025-09-30 22:17:30.663596', 'step': 4178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:30.719707', 'step': 4178, 'epoch': 1} {'type': 'loss', 'content': 0.2146347016096115, 'timestamp': '2025-09-30 22:17:30.722745', 'step': 4179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:30.780242', 'step': 4179, 'epoch': 1} {'type': 'loss', 'content': 0.1411963701248169, 'timestamp': '2025-09-30 22:17:30.786796', 'step': 4180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:30.843303', 'step': 4180, 'epoch': 1} {'type': 'loss', 'content': 0.05190291255712509, 'timestamp': '2025-09-30 22:17:30.846054', 'step': 4181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:30.903869', 'step': 4181, 'epoch': 1} {'type': 'loss', 'content': 0.14556841552257538, 'timestamp': '2025-09-30 22:17:30.906425', 'step': 4182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:30.964155', 'step': 4182, 'epoch': 1} {'type': 'loss', 'content': 0.23512765765190125, 'timestamp': '2025-09-30 22:17:30.972749', 'step': 4183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:31.030239', 'step': 4183, 'epoch': 1} {'type': 'loss', 'content': 0.13112731277942657, 'timestamp': '2025-09-30 22:17:31.036553', 'step': 4184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:31.093585', 'step': 4184, 'epoch': 1} {'type': 'loss', 'content': 0.15304550528526306, 'timestamp': '2025-09-30 22:17:31.096526', 'step': 4185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:31.152864', 'step': 4185, 'epoch': 1} {'type': 'loss', 'content': 0.10964647680521011, 'timestamp': '2025-09-30 22:17:31.158875', 'step': 4186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:31.221086', 'step': 4186, 'epoch': 1} {'type': 'loss', 'content': 0.2267858237028122, 'timestamp': '2025-09-30 22:17:31.227267', 'step': 4187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:31.283531', 'step': 4187, 'epoch': 1} {'type': 'loss', 'content': 0.10939601808786392, 'timestamp': '2025-09-30 22:17:31.291042', 'step': 4188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:31.348255', 'step': 4188, 'epoch': 1} {'type': 'loss', 'content': 0.12947121262550354, 'timestamp': '2025-09-30 22:17:31.353274', 'step': 4189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:31.411623', 'step': 4189, 'epoch': 1} {'type': 'loss', 'content': 0.24779513478279114, 'timestamp': '2025-09-30 22:17:31.416640', 'step': 4190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:31.480925', 'step': 4190, 'epoch': 1} {'type': 'loss', 'content': 0.12633991241455078, 'timestamp': '2025-09-30 22:17:31.486843', 'step': 4191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:31.562144', 'step': 4191, 'epoch': 1} {'type': 'loss', 'content': 0.14945992827415466, 'timestamp': '2025-09-30 22:17:31.571288', 'step': 4192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:31.627354', 'step': 4192, 'epoch': 1} {'type': 'loss', 'content': 0.17085230350494385, 'timestamp': '2025-09-30 22:17:31.632348', 'step': 4193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:31.692128', 'step': 4193, 'epoch': 1} {'type': 'loss', 'content': 0.23948492109775543, 'timestamp': '2025-09-30 22:17:31.694975', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:17:45.173595', 'step': 4194, 'epoch': 1} {'type': 'pplx', 'content': 14013.350870083308, 'timestamp': '2025-09-30 22:17:45.182851', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:45.239527', 'step': 4194, 'epoch': 1} {'type': 'loss', 'content': 0.18234914541244507, 'timestamp': '2025-09-30 22:17:45.243690', 'step': 4195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:45.321378', 'step': 4195, 'epoch': 1} {'type': 'loss', 'content': 0.12832534313201904, 'timestamp': '2025-09-30 22:17:45.328271', 'step': 4196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:45.385709', 'step': 4196, 'epoch': 1} {'type': 'loss', 'content': 0.1325136423110962, 'timestamp': '2025-09-30 22:17:45.388918', 'step': 4197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:45.450064', 'step': 4197, 'epoch': 1} {'type': 'loss', 'content': 0.14469192922115326, 'timestamp': '2025-09-30 22:17:45.457790', 'step': 4198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:45.519471', 'step': 4198, 'epoch': 1} {'type': 'loss', 'content': 0.132550448179245, 'timestamp': '2025-09-30 22:17:45.522920', 'step': 4199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:45.586161', 'step': 4199, 'epoch': 1} {'type': 'loss', 'content': 0.16773609817028046, 'timestamp': '2025-09-30 22:17:45.592684', 'step': 4200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:45.665165', 'step': 4200, 'epoch': 1} {'type': 'loss', 'content': 0.11193355172872543, 'timestamp': '2025-09-30 22:17:45.672658', 'step': 4201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:45.730216', 'step': 4201, 'epoch': 1} {'type': 'loss', 'content': 0.23732399940490723, 'timestamp': '2025-09-30 22:17:45.732935', 'step': 4202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:45.790155', 'step': 4202, 'epoch': 1} {'type': 'loss', 'content': 0.21804279088974, 'timestamp': '2025-09-30 22:17:45.793384', 'step': 4203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:45.853181', 'step': 4203, 'epoch': 1} {'type': 'loss', 'content': 0.25837305188179016, 'timestamp': '2025-09-30 22:17:45.859509', 'step': 4204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:45.916115', 'step': 4204, 'epoch': 1} {'type': 'loss', 'content': 0.1405479460954666, 'timestamp': '2025-09-30 22:17:45.918397', 'step': 4205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:45.977345', 'step': 4205, 'epoch': 1} {'type': 'loss', 'content': 0.172744482755661, 'timestamp': '2025-09-30 22:17:45.979870', 'step': 4206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.038629', 'step': 4206, 'epoch': 1} {'type': 'loss', 'content': 0.14770427346229553, 'timestamp': '2025-09-30 22:17:46.041420', 'step': 4207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.100954', 'step': 4207, 'epoch': 1} {'type': 'loss', 'content': 0.15588724613189697, 'timestamp': '2025-09-30 22:17:46.107470', 'step': 4208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:46.164703', 'step': 4208, 'epoch': 1} {'type': 'loss', 'content': 0.21439822018146515, 'timestamp': '2025-09-30 22:17:46.168045', 'step': 4209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:46.225501', 'step': 4209, 'epoch': 1} {'type': 'loss', 'content': 0.2001386284828186, 'timestamp': '2025-09-30 22:17:46.227937', 'step': 4210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.285771', 'step': 4210, 'epoch': 1} {'type': 'loss', 'content': 0.12455464899539948, 'timestamp': '2025-09-30 22:17:46.288292', 'step': 4211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.345689', 'step': 4211, 'epoch': 1} {'type': 'loss', 'content': 0.29611334204673767, 'timestamp': '2025-09-30 22:17:46.353066', 'step': 4212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.411541', 'step': 4212, 'epoch': 1} {'type': 'loss', 'content': 0.13024000823497772, 'timestamp': '2025-09-30 22:17:46.416203', 'step': 4213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:46.475347', 'step': 4213, 'epoch': 1} {'type': 'loss', 'content': 0.172957643866539, 'timestamp': '2025-09-30 22:17:46.478025', 'step': 4214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.535256', 'step': 4214, 'epoch': 1} {'type': 'loss', 'content': 0.19078989326953888, 'timestamp': '2025-09-30 22:17:46.537617', 'step': 4215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:46.598401', 'step': 4215, 'epoch': 1} {'type': 'loss', 'content': 0.205023854970932, 'timestamp': '2025-09-30 22:17:46.604491', 'step': 4216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.676831', 'step': 4216, 'epoch': 1} {'type': 'loss', 'content': 0.18014493584632874, 'timestamp': '2025-09-30 22:17:46.679430', 'step': 4217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:46.737194', 'step': 4217, 'epoch': 1} {'type': 'loss', 'content': 0.17618030309677124, 'timestamp': '2025-09-30 22:17:46.740139', 'step': 4218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.798467', 'step': 4218, 'epoch': 1} {'type': 'loss', 'content': 0.18896877765655518, 'timestamp': '2025-09-30 22:17:46.800813', 'step': 4219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:46.870855', 'step': 4219, 'epoch': 1} {'type': 'loss', 'content': 0.13254320621490479, 'timestamp': '2025-09-30 22:17:46.876897', 'step': 4220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:46.932898', 'step': 4220, 'epoch': 1} {'type': 'loss', 'content': 0.0968451276421547, 'timestamp': '2025-09-30 22:17:46.938869', 'step': 4221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:46.997167', 'step': 4221, 'epoch': 1} {'type': 'loss', 'content': 0.12500210106372833, 'timestamp': '2025-09-30 22:17:46.999967', 'step': 4222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:47.062030', 'step': 4222, 'epoch': 1} {'type': 'loss', 'content': 0.19666296243667603, 'timestamp': '2025-09-30 22:17:47.067985', 'step': 4223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:47.131438', 'step': 4223, 'epoch': 1} {'type': 'loss', 'content': 0.19693613052368164, 'timestamp': '2025-09-30 22:17:47.140163', 'step': 4224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:47.213510', 'step': 4224, 'epoch': 1} {'type': 'loss', 'content': 0.14248840510845184, 'timestamp': '2025-09-30 22:17:47.216137', 'step': 4225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:47.273444', 'step': 4225, 'epoch': 1} {'type': 'loss', 'content': 0.22098557651042938, 'timestamp': '2025-09-30 22:17:47.280780', 'step': 4226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:47.339559', 'step': 4226, 'epoch': 1} {'type': 'loss', 'content': 0.27724963426589966, 'timestamp': '2025-09-30 22:17:47.342580', 'step': 4227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:47.402820', 'step': 4227, 'epoch': 1} {'type': 'loss', 'content': 0.17146281898021698, 'timestamp': '2025-09-30 22:17:47.409016', 'step': 4228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:47.465253', 'step': 4228, 'epoch': 1} {'type': 'loss', 'content': 0.0819738581776619, 'timestamp': '2025-09-30 22:17:47.467936', 'step': 4229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:47.525117', 'step': 4229, 'epoch': 1} {'type': 'loss', 'content': 0.14081257581710815, 'timestamp': '2025-09-30 22:17:47.528871', 'step': 4230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:47.595442', 'step': 4230, 'epoch': 1} {'type': 'loss', 'content': 0.10442155599594116, 'timestamp': '2025-09-30 22:17:47.598466', 'step': 4231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:47.655710', 'step': 4231, 'epoch': 1} {'type': 'loss', 'content': 0.15172168612480164, 'timestamp': '2025-09-30 22:17:47.662215', 'step': 4232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:47.719773', 'step': 4232, 'epoch': 1} {'type': 'loss', 'content': 0.22769325971603394, 'timestamp': '2025-09-30 22:17:47.722240', 'step': 4233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:47.779433', 'step': 4233, 'epoch': 1} {'type': 'loss', 'content': 0.24045389890670776, 'timestamp': '2025-09-30 22:17:47.784840', 'step': 4234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:47.841834', 'step': 4234, 'epoch': 1} {'type': 'loss', 'content': 0.2976594865322113, 'timestamp': '2025-09-30 22:17:47.848661', 'step': 4235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:47.923041', 'step': 4235, 'epoch': 1} {'type': 'loss', 'content': 0.19884911179542542, 'timestamp': '2025-09-30 22:17:47.929561', 'step': 4236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:47.985829', 'step': 4236, 'epoch': 1} {'type': 'loss', 'content': 0.19194704294204712, 'timestamp': '2025-09-30 22:17:47.991149', 'step': 4237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.047956', 'step': 4237, 'epoch': 1} {'type': 'loss', 'content': 0.15809544920921326, 'timestamp': '2025-09-30 22:17:48.050513', 'step': 4238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.107223', 'step': 4238, 'epoch': 1} {'type': 'loss', 'content': 0.14341725409030914, 'timestamp': '2025-09-30 22:17:48.109708', 'step': 4239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.168895', 'step': 4239, 'epoch': 1} {'type': 'loss', 'content': 0.06167537346482277, 'timestamp': '2025-09-30 22:17:48.175078', 'step': 4240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:48.231323', 'step': 4240, 'epoch': 1} {'type': 'loss', 'content': 0.11295250058174133, 'timestamp': '2025-09-30 22:17:48.234881', 'step': 4241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:48.292303', 'step': 4241, 'epoch': 1} {'type': 'loss', 'content': 0.10832738131284714, 'timestamp': '2025-09-30 22:17:48.306336', 'step': 4242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:48.375970', 'step': 4242, 'epoch': 1} {'type': 'loss', 'content': 0.2229844331741333, 'timestamp': '2025-09-30 22:17:48.378926', 'step': 4243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.435532', 'step': 4243, 'epoch': 1} {'type': 'loss', 'content': 0.21084590256214142, 'timestamp': '2025-09-30 22:17:48.445711', 'step': 4244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.501107', 'step': 4244, 'epoch': 1} {'type': 'loss', 'content': 0.12040049582719803, 'timestamp': '2025-09-30 22:17:48.504232', 'step': 4245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.567254', 'step': 4245, 'epoch': 1} {'type': 'loss', 'content': 0.16623130440711975, 'timestamp': '2025-09-30 22:17:48.570172', 'step': 4246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.627446', 'step': 4246, 'epoch': 1} {'type': 'loss', 'content': 0.27976900339126587, 'timestamp': '2025-09-30 22:17:48.631248', 'step': 4247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:48.688115', 'step': 4247, 'epoch': 1} {'type': 'loss', 'content': 0.1618722826242447, 'timestamp': '2025-09-30 22:17:48.697442', 'step': 4248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.754419', 'step': 4248, 'epoch': 1} {'type': 'loss', 'content': 0.24439799785614014, 'timestamp': '2025-09-30 22:17:48.756810', 'step': 4249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:48.813868', 'step': 4249, 'epoch': 1} {'type': 'loss', 'content': 0.27260687947273254, 'timestamp': '2025-09-30 22:17:48.816984', 'step': 4250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:48.873702', 'step': 4250, 'epoch': 1} {'type': 'loss', 'content': 0.18602211773395538, 'timestamp': '2025-09-30 22:17:48.876618', 'step': 4251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:48.933449', 'step': 4251, 'epoch': 1} {'type': 'loss', 'content': 0.1234174594283104, 'timestamp': '2025-09-30 22:17:48.944857', 'step': 4252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:49.001012', 'step': 4252, 'epoch': 1} {'type': 'loss', 'content': 0.13463017344474792, 'timestamp': '2025-09-30 22:17:49.009163', 'step': 4253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:49.066402', 'step': 4253, 'epoch': 1} {'type': 'loss', 'content': 0.19102732837200165, 'timestamp': '2025-09-30 22:17:49.072758', 'step': 4254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:49.132346', 'step': 4254, 'epoch': 1} {'type': 'loss', 'content': 0.13958437740802765, 'timestamp': '2025-09-30 22:17:49.140077', 'step': 4255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:17:49.198828', 'step': 4255, 'epoch': 1} {'type': 'loss', 'content': 0.11707563698291779, 'timestamp': '2025-09-30 22:17:49.208656', 'step': 4256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:49.267136', 'step': 4256, 'epoch': 1} {'type': 'loss', 'content': 0.24542902410030365, 'timestamp': '2025-09-30 22:17:49.274570', 'step': 4257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:49.331789', 'step': 4257, 'epoch': 1} {'type': 'loss', 'content': 0.27335476875305176, 'timestamp': '2025-09-30 22:17:49.335350', 'step': 4258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:49.398732', 'step': 4258, 'epoch': 1} {'type': 'loss', 'content': 0.16267867386341095, 'timestamp': '2025-09-30 22:17:49.411354', 'step': 4259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:49.469188', 'step': 4259, 'epoch': 1} {'type': 'loss', 'content': 0.1360635906457901, 'timestamp': '2025-09-30 22:17:49.475706', 'step': 4260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:49.543157', 'step': 4260, 'epoch': 1} {'type': 'loss', 'content': 0.18267999589443207, 'timestamp': '2025-09-30 22:17:49.546685', 'step': 4261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:49.612154', 'step': 4261, 'epoch': 1} {'type': 'loss', 'content': 0.1605520397424698, 'timestamp': '2025-09-30 22:17:49.619199', 'step': 4262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:49.681866', 'step': 4262, 'epoch': 1} {'type': 'loss', 'content': 0.18773779273033142, 'timestamp': '2025-09-30 22:17:49.685640', 'step': 4263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:49.745157', 'step': 4263, 'epoch': 1} {'type': 'loss', 'content': 0.0851854458451271, 'timestamp': '2025-09-30 22:17:49.756461', 'step': 4264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:49.820079', 'step': 4264, 'epoch': 1} {'type': 'loss', 'content': 0.12988321483135223, 'timestamp': '2025-09-30 22:17:49.822987', 'step': 4265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:49.879754', 'step': 4265, 'epoch': 1} {'type': 'loss', 'content': 0.21167100965976715, 'timestamp': '2025-09-30 22:17:49.882671', 'step': 4266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:49.953167', 'step': 4266, 'epoch': 1} {'type': 'loss', 'content': 0.15246716141700745, 'timestamp': '2025-09-30 22:17:49.972559', 'step': 4267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:50.036207', 'step': 4267, 'epoch': 1} {'type': 'loss', 'content': 0.19641825556755066, 'timestamp': '2025-09-30 22:17:50.053809', 'step': 4268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:50.129557', 'step': 4268, 'epoch': 1} {'type': 'loss', 'content': 0.22998350858688354, 'timestamp': '2025-09-30 22:17:50.145252', 'step': 4269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:50.218380', 'step': 4269, 'epoch': 1} {'type': 'loss', 'content': 0.0929231122136116, 'timestamp': '2025-09-30 22:17:50.226545', 'step': 4270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:50.292738', 'step': 4270, 'epoch': 1} {'type': 'loss', 'content': 0.16205839812755585, 'timestamp': '2025-09-30 22:17:50.303429', 'step': 4271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:50.367133', 'step': 4271, 'epoch': 1} {'type': 'loss', 'content': 0.1282133013010025, 'timestamp': '2025-09-30 22:17:50.384415', 'step': 4272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:50.489412', 'step': 4272, 'epoch': 1} {'type': 'loss', 'content': 0.1751350313425064, 'timestamp': '2025-09-30 22:17:50.499399', 'step': 4273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:50.564781', 'step': 4273, 'epoch': 1} {'type': 'loss', 'content': 0.22875311970710754, 'timestamp': '2025-09-30 22:17:50.574246', 'step': 4274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:50.646168', 'step': 4274, 'epoch': 1} {'type': 'loss', 'content': 0.1449529230594635, 'timestamp': '2025-09-30 22:17:50.651179', 'step': 4275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:50.714048', 'step': 4275, 'epoch': 1} {'type': 'loss', 'content': 0.16183783113956451, 'timestamp': '2025-09-30 22:17:50.736416', 'step': 4276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:50.817978', 'step': 4276, 'epoch': 1} {'type': 'loss', 'content': 0.10214155912399292, 'timestamp': '2025-09-30 22:17:50.833144', 'step': 4277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:50.903560', 'step': 4277, 'epoch': 1} {'type': 'loss', 'content': 0.10316126048564911, 'timestamp': '2025-09-30 22:17:50.913519', 'step': 4278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:50.986338', 'step': 4278, 'epoch': 1} {'type': 'loss', 'content': 0.1807364523410797, 'timestamp': '2025-09-30 22:17:50.993579', 'step': 4279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:51.061707', 'step': 4279, 'epoch': 1} {'type': 'loss', 'content': 0.23290443420410156, 'timestamp': '2025-09-30 22:17:51.086268', 'step': 4280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:51.153141', 'step': 4280, 'epoch': 1} {'type': 'loss', 'content': 0.13757239282131195, 'timestamp': '2025-09-30 22:17:51.161976', 'step': 4281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:51.225854', 'step': 4281, 'epoch': 1} {'type': 'loss', 'content': 0.09881668537855148, 'timestamp': '2025-09-30 22:17:51.237449', 'step': 4282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:51.304443', 'step': 4282, 'epoch': 1} {'type': 'loss', 'content': 0.16301082074642181, 'timestamp': '2025-09-30 22:17:51.321313', 'step': 4283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:51.385287', 'step': 4283, 'epoch': 1} {'type': 'loss', 'content': 0.20941190421581268, 'timestamp': '2025-09-30 22:17:51.399027', 'step': 4284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:51.458531', 'step': 4284, 'epoch': 1} {'type': 'loss', 'content': 0.16702231764793396, 'timestamp': '2025-09-30 22:17:51.461282', 'step': 4285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:51.523459', 'step': 4285, 'epoch': 1} {'type': 'loss', 'content': 0.18401101231575012, 'timestamp': '2025-09-30 22:17:51.527002', 'step': 4286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:51.584710', 'step': 4286, 'epoch': 1} {'type': 'loss', 'content': 0.1926792860031128, 'timestamp': '2025-09-30 22:17:51.587940', 'step': 4287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:51.648134', 'step': 4287, 'epoch': 1} {'type': 'loss', 'content': 0.16721810400485992, 'timestamp': '2025-09-30 22:17:51.654506', 'step': 4288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:51.711462', 'step': 4288, 'epoch': 1} {'type': 'loss', 'content': 0.1352473944425583, 'timestamp': '2025-09-30 22:17:51.714529', 'step': 4289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:51.771381', 'step': 4289, 'epoch': 1} {'type': 'loss', 'content': 0.24049554765224457, 'timestamp': '2025-09-30 22:17:51.774339', 'step': 4290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:51.838411', 'step': 4290, 'epoch': 1} {'type': 'loss', 'content': 0.1277187019586563, 'timestamp': '2025-09-30 22:17:51.845144', 'step': 4291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:51.906155', 'step': 4291, 'epoch': 1} {'type': 'loss', 'content': 0.1541549265384674, 'timestamp': '2025-09-30 22:17:51.912278', 'step': 4292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:51.967754', 'step': 4292, 'epoch': 1} {'type': 'loss', 'content': 0.29110532999038696, 'timestamp': '2025-09-30 22:17:51.971252', 'step': 4293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:52.033858', 'step': 4293, 'epoch': 1} {'type': 'loss', 'content': 0.11292312294244766, 'timestamp': '2025-09-30 22:17:52.037872', 'step': 4294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:52.103954', 'step': 4294, 'epoch': 1} {'type': 'loss', 'content': 0.13289180397987366, 'timestamp': '2025-09-30 22:17:52.112900', 'step': 4295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:52.179481', 'step': 4295, 'epoch': 1} {'type': 'loss', 'content': 0.16043324768543243, 'timestamp': '2025-09-30 22:17:52.192860', 'step': 4296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:52.256461', 'step': 4296, 'epoch': 1} {'type': 'loss', 'content': 0.20155049860477448, 'timestamp': '2025-09-30 22:17:52.261153', 'step': 4297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:52.320125', 'step': 4297, 'epoch': 1} {'type': 'loss', 'content': 0.13113577663898468, 'timestamp': '2025-09-30 22:17:52.323599', 'step': 4298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:52.380983', 'step': 4298, 'epoch': 1} {'type': 'loss', 'content': 0.2764514088630676, 'timestamp': '2025-09-30 22:17:52.384043', 'step': 4299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:52.450968', 'step': 4299, 'epoch': 1} {'type': 'loss', 'content': 0.17894740402698517, 'timestamp': '2025-09-30 22:17:52.464452', 'step': 4300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:17:52.527059', 'step': 4300, 'epoch': 1} {'type': 'loss', 'content': 0.19844211637973785, 'timestamp': '2025-09-30 22:17:52.530914', 'step': 4301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:52.589433', 'step': 4301, 'epoch': 1} {'type': 'loss', 'content': 0.11796960979700089, 'timestamp': '2025-09-30 22:17:52.592324', 'step': 4302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:52.652217', 'step': 4302, 'epoch': 1} {'type': 'loss', 'content': 0.17527812719345093, 'timestamp': '2025-09-30 22:17:52.655097', 'step': 4303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:52.718989', 'step': 4303, 'epoch': 1} {'type': 'loss', 'content': 0.13530988991260529, 'timestamp': '2025-09-30 22:17:52.725740', 'step': 4304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:52.782902', 'step': 4304, 'epoch': 1} {'type': 'loss', 'content': 0.17998936772346497, 'timestamp': '2025-09-30 22:17:52.786367', 'step': 4305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:52.843419', 'step': 4305, 'epoch': 1} {'type': 'loss', 'content': 0.10619131475687027, 'timestamp': '2025-09-30 22:17:52.847251', 'step': 4306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:52.904392', 'step': 4306, 'epoch': 1} {'type': 'loss', 'content': 0.12291579693555832, 'timestamp': '2025-09-30 22:17:52.913845', 'step': 4307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:52.979323', 'step': 4307, 'epoch': 1} {'type': 'loss', 'content': 0.15035869181156158, 'timestamp': '2025-09-30 22:17:52.986964', 'step': 4308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:53.060128', 'step': 4308, 'epoch': 1} {'type': 'loss', 'content': 0.1563863605260849, 'timestamp': '2025-09-30 22:17:53.064001', 'step': 4309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:53.129967', 'step': 4309, 'epoch': 1} {'type': 'loss', 'content': 0.21172159910202026, 'timestamp': '2025-09-30 22:17:53.132962', 'step': 4310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:53.190285', 'step': 4310, 'epoch': 1} {'type': 'loss', 'content': 0.07948964834213257, 'timestamp': '2025-09-30 22:17:53.192874', 'step': 4311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:53.250959', 'step': 4311, 'epoch': 1} {'type': 'loss', 'content': 0.17497944831848145, 'timestamp': '2025-09-30 22:17:53.258198', 'step': 4312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:53.316867', 'step': 4312, 'epoch': 1} {'type': 'loss', 'content': 0.1380102038383484, 'timestamp': '2025-09-30 22:17:53.329687', 'step': 4313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:53.388524', 'step': 4313, 'epoch': 1} {'type': 'loss', 'content': 0.2024606466293335, 'timestamp': '2025-09-30 22:17:53.393220', 'step': 4314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:53.461484', 'step': 4314, 'epoch': 1} {'type': 'loss', 'content': 0.2045048624277115, 'timestamp': '2025-09-30 22:17:53.464982', 'step': 4315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:53.522850', 'step': 4315, 'epoch': 1} {'type': 'loss', 'content': 0.12959159910678864, 'timestamp': '2025-09-30 22:17:53.529216', 'step': 4316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:53.594431', 'step': 4316, 'epoch': 1} {'type': 'loss', 'content': 0.2867019772529602, 'timestamp': '2025-09-30 22:17:53.597729', 'step': 4317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:53.655217', 'step': 4317, 'epoch': 1} {'type': 'loss', 'content': 0.12736822664737701, 'timestamp': '2025-09-30 22:17:53.659748', 'step': 4318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:53.718802', 'step': 4318, 'epoch': 1} {'type': 'loss', 'content': 0.16822269558906555, 'timestamp': '2025-09-30 22:17:53.724895', 'step': 4319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:53.782001', 'step': 4319, 'epoch': 1} {'type': 'loss', 'content': 0.17455217242240906, 'timestamp': '2025-09-30 22:17:53.788041', 'step': 4320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:53.857078', 'step': 4320, 'epoch': 1} {'type': 'loss', 'content': 0.13519392907619476, 'timestamp': '2025-09-30 22:17:53.860271', 'step': 4321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:53.917647', 'step': 4321, 'epoch': 1} {'type': 'loss', 'content': 0.1900259256362915, 'timestamp': '2025-09-30 22:17:53.922137', 'step': 4322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:53.983328', 'step': 4322, 'epoch': 1} {'type': 'loss', 'content': 0.22927488386631012, 'timestamp': '2025-09-30 22:17:53.990909', 'step': 4323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:54.052612', 'step': 4323, 'epoch': 1} {'type': 'loss', 'content': 0.1947685331106186, 'timestamp': '2025-09-30 22:17:54.058709', 'step': 4324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:54.126536', 'step': 4324, 'epoch': 1} {'type': 'loss', 'content': 0.13380353152751923, 'timestamp': '2025-09-30 22:17:54.129689', 'step': 4325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:54.190804', 'step': 4325, 'epoch': 1} {'type': 'loss', 'content': 0.15701647102832794, 'timestamp': '2025-09-30 22:17:54.194263', 'step': 4326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:54.257030', 'step': 4326, 'epoch': 1} {'type': 'loss', 'content': 0.30637332797050476, 'timestamp': '2025-09-30 22:17:54.267566', 'step': 4327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:54.325217', 'step': 4327, 'epoch': 1} {'type': 'loss', 'content': 0.17091675102710724, 'timestamp': '2025-09-30 22:17:54.332626', 'step': 4328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:54.391772', 'step': 4328, 'epoch': 1} {'type': 'loss', 'content': 0.1621236950159073, 'timestamp': '2025-09-30 22:17:54.395726', 'step': 4329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:54.454615', 'step': 4329, 'epoch': 1} {'type': 'loss', 'content': 0.20355215668678284, 'timestamp': '2025-09-30 22:17:54.458655', 'step': 4330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:54.516388', 'step': 4330, 'epoch': 1} {'type': 'loss', 'content': 0.1950233280658722, 'timestamp': '2025-09-30 22:17:54.525034', 'step': 4331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:54.584947', 'step': 4331, 'epoch': 1} {'type': 'loss', 'content': 0.08839984983205795, 'timestamp': '2025-09-30 22:17:54.591979', 'step': 4332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:54.651455', 'step': 4332, 'epoch': 1} {'type': 'loss', 'content': 0.1324867308139801, 'timestamp': '2025-09-30 22:17:54.654768', 'step': 4333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:54.711866', 'step': 4333, 'epoch': 1} {'type': 'loss', 'content': 0.0930740013718605, 'timestamp': '2025-09-30 22:17:54.717921', 'step': 4334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:54.778703', 'step': 4334, 'epoch': 1} {'type': 'loss', 'content': 0.11526418477296829, 'timestamp': '2025-09-30 22:17:54.784989', 'step': 4335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:54.845002', 'step': 4335, 'epoch': 1} {'type': 'loss', 'content': 0.1794624924659729, 'timestamp': '2025-09-30 22:17:54.851134', 'step': 4336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:54.910231', 'step': 4336, 'epoch': 1} {'type': 'loss', 'content': 0.12080840021371841, 'timestamp': '2025-09-30 22:17:54.915626', 'step': 4337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:54.975813', 'step': 4337, 'epoch': 1} {'type': 'loss', 'content': 0.2260928601026535, 'timestamp': '2025-09-30 22:17:54.979163', 'step': 4338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:55.041516', 'step': 4338, 'epoch': 1} {'type': 'loss', 'content': 0.18092185258865356, 'timestamp': '2025-09-30 22:17:55.045583', 'step': 4339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:55.104352', 'step': 4339, 'epoch': 1} {'type': 'loss', 'content': 0.12646116316318512, 'timestamp': '2025-09-30 22:17:55.115984', 'step': 4340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:55.175841', 'step': 4340, 'epoch': 1} {'type': 'loss', 'content': 0.15087799727916718, 'timestamp': '2025-09-30 22:17:55.179789', 'step': 4341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:55.252509', 'step': 4341, 'epoch': 1} {'type': 'loss', 'content': 0.16633005440235138, 'timestamp': '2025-09-30 22:17:55.259341', 'step': 4342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:55.317529', 'step': 4342, 'epoch': 1} {'type': 'loss', 'content': 0.18048974871635437, 'timestamp': '2025-09-30 22:17:55.320458', 'step': 4343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:55.380206', 'step': 4343, 'epoch': 1} {'type': 'loss', 'content': 0.1896953284740448, 'timestamp': '2025-09-30 22:17:55.386425', 'step': 4344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:55.449836', 'step': 4344, 'epoch': 1} {'type': 'loss', 'content': 0.10114073008298874, 'timestamp': '2025-09-30 22:17:55.452774', 'step': 4345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:55.509783', 'step': 4345, 'epoch': 1} {'type': 'loss', 'content': 0.1348746418952942, 'timestamp': '2025-09-30 22:17:55.512773', 'step': 4346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:55.569981', 'step': 4346, 'epoch': 1} {'type': 'loss', 'content': 0.1907787173986435, 'timestamp': '2025-09-30 22:17:55.581980', 'step': 4347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:55.640813', 'step': 4347, 'epoch': 1} {'type': 'loss', 'content': 0.21537499129772186, 'timestamp': '2025-09-30 22:17:55.647541', 'step': 4348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:55.704497', 'step': 4348, 'epoch': 1} {'type': 'loss', 'content': 0.16246230900287628, 'timestamp': '2025-09-30 22:17:55.720240', 'step': 4349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:55.778835', 'step': 4349, 'epoch': 1} {'type': 'loss', 'content': 0.2131325751543045, 'timestamp': '2025-09-30 22:17:55.782804', 'step': 4350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:55.842303', 'step': 4350, 'epoch': 1} {'type': 'loss', 'content': 0.12890422344207764, 'timestamp': '2025-09-30 22:17:55.850655', 'step': 4351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:55.921611', 'step': 4351, 'epoch': 1} {'type': 'loss', 'content': 0.1936434954404831, 'timestamp': '2025-09-30 22:17:55.928628', 'step': 4352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:55.984874', 'step': 4352, 'epoch': 1} {'type': 'loss', 'content': 0.132491335272789, 'timestamp': '2025-09-30 22:17:55.992169', 'step': 4353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:56.049115', 'step': 4353, 'epoch': 1} {'type': 'loss', 'content': 0.20613302290439606, 'timestamp': '2025-09-30 22:17:56.056369', 'step': 4354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:56.116697', 'step': 4354, 'epoch': 1} {'type': 'loss', 'content': 0.21781179308891296, 'timestamp': '2025-09-30 22:17:56.120505', 'step': 4355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:56.189810', 'step': 4355, 'epoch': 1} {'type': 'loss', 'content': 0.07889250665903091, 'timestamp': '2025-09-30 22:17:56.196551', 'step': 4356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:56.253569', 'step': 4356, 'epoch': 1} {'type': 'loss', 'content': 0.23693469166755676, 'timestamp': '2025-09-30 22:17:56.262161', 'step': 4357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:56.321636', 'step': 4357, 'epoch': 1} {'type': 'loss', 'content': 0.1819380521774292, 'timestamp': '2025-09-30 22:17:56.325418', 'step': 4358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:56.382298', 'step': 4358, 'epoch': 1} {'type': 'loss', 'content': 0.21730050444602966, 'timestamp': '2025-09-30 22:17:56.386202', 'step': 4359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:56.444769', 'step': 4359, 'epoch': 1} {'type': 'loss', 'content': 0.05161764472723007, 'timestamp': '2025-09-30 22:17:56.456849', 'step': 4360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:56.518431', 'step': 4360, 'epoch': 1} {'type': 'loss', 'content': 0.1314893513917923, 'timestamp': '2025-09-30 22:17:56.521073', 'step': 4361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:56.579762', 'step': 4361, 'epoch': 1} {'type': 'loss', 'content': 0.2469167411327362, 'timestamp': '2025-09-30 22:17:56.582421', 'step': 4362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:56.647867', 'step': 4362, 'epoch': 1} {'type': 'loss', 'content': 0.12223558872938156, 'timestamp': '2025-09-30 22:17:56.650544', 'step': 4363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:56.708911', 'step': 4363, 'epoch': 1} {'type': 'loss', 'content': 0.16734062135219574, 'timestamp': '2025-09-30 22:17:56.722285', 'step': 4364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:56.782593', 'step': 4364, 'epoch': 1} {'type': 'loss', 'content': 0.15228812396526337, 'timestamp': '2025-09-30 22:17:56.785570', 'step': 4365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:56.842378', 'step': 4365, 'epoch': 1} {'type': 'loss', 'content': 0.1724538803100586, 'timestamp': '2025-09-30 22:17:56.845201', 'step': 4366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:56.905118', 'step': 4366, 'epoch': 1} {'type': 'loss', 'content': 0.1774141788482666, 'timestamp': '2025-09-30 22:17:56.911077', 'step': 4367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:56.967960', 'step': 4367, 'epoch': 1} {'type': 'loss', 'content': 0.32046738266944885, 'timestamp': '2025-09-30 22:17:56.975987', 'step': 4368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:57.043737', 'step': 4368, 'epoch': 1} {'type': 'loss', 'content': 0.1409132480621338, 'timestamp': '2025-09-30 22:17:57.046206', 'step': 4369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:57.106575', 'step': 4369, 'epoch': 1} {'type': 'loss', 'content': 0.3125748932361603, 'timestamp': '2025-09-30 22:17:57.111173', 'step': 4370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:57.170024', 'step': 4370, 'epoch': 1} {'type': 'loss', 'content': 0.18486729264259338, 'timestamp': '2025-09-30 22:17:57.174622', 'step': 4371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:57.243243', 'step': 4371, 'epoch': 1} {'type': 'loss', 'content': 0.24390989542007446, 'timestamp': '2025-09-30 22:17:57.249820', 'step': 4372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:57.306696', 'step': 4372, 'epoch': 1} {'type': 'loss', 'content': 0.14862793684005737, 'timestamp': '2025-09-30 22:17:57.310850', 'step': 4373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:57.367399', 'step': 4373, 'epoch': 1} {'type': 'loss', 'content': 0.25914856791496277, 'timestamp': '2025-09-30 22:17:57.370306', 'step': 4374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:57.441849', 'step': 4374, 'epoch': 1} {'type': 'loss', 'content': 0.2725355327129364, 'timestamp': '2025-09-30 22:17:57.444643', 'step': 4375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:57.500974', 'step': 4375, 'epoch': 1} {'type': 'loss', 'content': 0.1714727282524109, 'timestamp': '2025-09-30 22:17:57.512297', 'step': 4376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:57.571086', 'step': 4376, 'epoch': 1} {'type': 'loss', 'content': 0.09614919126033783, 'timestamp': '2025-09-30 22:17:57.576748', 'step': 4377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:57.634340', 'step': 4377, 'epoch': 1} {'type': 'loss', 'content': 0.11968892812728882, 'timestamp': '2025-09-30 22:17:57.637434', 'step': 4378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:57.694508', 'step': 4378, 'epoch': 1} {'type': 'loss', 'content': 0.21517546474933624, 'timestamp': '2025-09-30 22:17:57.701963', 'step': 4379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:57.763979', 'step': 4379, 'epoch': 1} {'type': 'loss', 'content': 0.12727756798267365, 'timestamp': '2025-09-30 22:17:57.770464', 'step': 4380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:57.829821', 'step': 4380, 'epoch': 1} {'type': 'loss', 'content': 0.18681225180625916, 'timestamp': '2025-09-30 22:17:57.832940', 'step': 4381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:57.890073', 'step': 4381, 'epoch': 1} {'type': 'loss', 'content': 0.2624099552631378, 'timestamp': '2025-09-30 22:17:57.893577', 'step': 4382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:57.950384', 'step': 4382, 'epoch': 1} {'type': 'loss', 'content': 0.1320011168718338, 'timestamp': '2025-09-30 22:17:57.956005', 'step': 4383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:58.013390', 'step': 4383, 'epoch': 1} {'type': 'loss', 'content': 0.12567947804927826, 'timestamp': '2025-09-30 22:17:58.022555', 'step': 4384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:58.081375', 'step': 4384, 'epoch': 1} {'type': 'loss', 'content': 0.18346881866455078, 'timestamp': '2025-09-30 22:17:58.086739', 'step': 4385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:58.145313', 'step': 4385, 'epoch': 1} {'type': 'loss', 'content': 0.2011839598417282, 'timestamp': '2025-09-30 22:17:58.161739', 'step': 4386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:58.219306', 'step': 4386, 'epoch': 1} {'type': 'loss', 'content': 0.2008960247039795, 'timestamp': '2025-09-30 22:17:58.223287', 'step': 4387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:58.280887', 'step': 4387, 'epoch': 1} {'type': 'loss', 'content': 0.10438137501478195, 'timestamp': '2025-09-30 22:17:58.287923', 'step': 4388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:58.361156', 'step': 4388, 'epoch': 1} {'type': 'loss', 'content': 0.1666499525308609, 'timestamp': '2025-09-30 22:17:58.370505', 'step': 4389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:58.434580', 'step': 4389, 'epoch': 1} {'type': 'loss', 'content': 0.1562637835741043, 'timestamp': '2025-09-30 22:17:58.437364', 'step': 4390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:58.493880', 'step': 4390, 'epoch': 1} {'type': 'loss', 'content': 0.15169329941272736, 'timestamp': '2025-09-30 22:17:58.496764', 'step': 4391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:58.553752', 'step': 4391, 'epoch': 1} {'type': 'loss', 'content': 0.18770529329776764, 'timestamp': '2025-09-30 22:17:58.562074', 'step': 4392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:58.619646', 'step': 4392, 'epoch': 1} {'type': 'loss', 'content': 0.12914249300956726, 'timestamp': '2025-09-30 22:17:58.623507', 'step': 4393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:58.684961', 'step': 4393, 'epoch': 1} {'type': 'loss', 'content': 0.17888161540031433, 'timestamp': '2025-09-30 22:17:58.690731', 'step': 4394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:58.748951', 'step': 4394, 'epoch': 1} {'type': 'loss', 'content': 0.15029959380626678, 'timestamp': '2025-09-30 22:17:58.754309', 'step': 4395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:58.813362', 'step': 4395, 'epoch': 1} {'type': 'loss', 'content': 0.23424819111824036, 'timestamp': '2025-09-30 22:17:58.819339', 'step': 4396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:58.875338', 'step': 4396, 'epoch': 1} {'type': 'loss', 'content': 0.16303591430187225, 'timestamp': '2025-09-30 22:17:58.878239', 'step': 4397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:58.944992', 'step': 4397, 'epoch': 1} {'type': 'loss', 'content': 0.1687571108341217, 'timestamp': '2025-09-30 22:17:58.947629', 'step': 4398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:59.004329', 'step': 4398, 'epoch': 1} {'type': 'loss', 'content': 0.13362710177898407, 'timestamp': '2025-09-30 22:17:59.009920', 'step': 4399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:59.067412', 'step': 4399, 'epoch': 1} {'type': 'loss', 'content': 0.1741969883441925, 'timestamp': '2025-09-30 22:17:59.074998', 'step': 4400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:59.131723', 'step': 4400, 'epoch': 1} {'type': 'loss', 'content': 0.1172686368227005, 'timestamp': '2025-09-30 22:17:59.134922', 'step': 4401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:59.191933', 'step': 4401, 'epoch': 1} {'type': 'loss', 'content': 0.267932653427124, 'timestamp': '2025-09-30 22:17:59.194910', 'step': 4402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:59.254814', 'step': 4402, 'epoch': 1} {'type': 'loss', 'content': 0.23460528254508972, 'timestamp': '2025-09-30 22:17:59.257275', 'step': 4403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:59.313770', 'step': 4403, 'epoch': 1} {'type': 'loss', 'content': 0.19814419746398926, 'timestamp': '2025-09-30 22:17:59.320586', 'step': 4404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:59.376709', 'step': 4404, 'epoch': 1} {'type': 'loss', 'content': 0.08781652897596359, 'timestamp': '2025-09-30 22:17:59.380141', 'step': 4405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:59.436917', 'step': 4405, 'epoch': 1} {'type': 'loss', 'content': 0.15412189066410065, 'timestamp': '2025-09-30 22:17:59.439917', 'step': 4406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:59.502099', 'step': 4406, 'epoch': 1} {'type': 'loss', 'content': 0.10698288679122925, 'timestamp': '2025-09-30 22:17:59.511045', 'step': 4407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:17:59.567567', 'step': 4407, 'epoch': 1} {'type': 'loss', 'content': 0.15732988715171814, 'timestamp': '2025-09-30 22:17:59.574581', 'step': 4408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:17:59.630682', 'step': 4408, 'epoch': 1} {'type': 'loss', 'content': 0.12334435433149338, 'timestamp': '2025-09-30 22:17:59.634069', 'step': 4409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:17:59.708310', 'step': 4409, 'epoch': 1} {'type': 'loss', 'content': 0.16624543070793152, 'timestamp': '2025-09-30 22:17:59.712088', 'step': 4410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:17:59.772195', 'step': 4410, 'epoch': 1} {'type': 'loss', 'content': 0.2055213451385498, 'timestamp': '2025-09-30 22:17:59.775032', 'step': 4411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:59.838288', 'step': 4411, 'epoch': 1} {'type': 'loss', 'content': 0.14677013456821442, 'timestamp': '2025-09-30 22:17:59.844949', 'step': 4412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:17:59.903921', 'step': 4412, 'epoch': 1} {'type': 'loss', 'content': 0.1531600058078766, 'timestamp': '2025-09-30 22:17:59.913410', 'step': 4413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:17:59.973926', 'step': 4413, 'epoch': 1} {'type': 'loss', 'content': 0.22804589569568634, 'timestamp': '2025-09-30 22:17:59.980220', 'step': 4414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:00.037741', 'step': 4414, 'epoch': 1} {'type': 'loss', 'content': 0.08959916979074478, 'timestamp': '2025-09-30 22:18:00.042566', 'step': 4415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.099570', 'step': 4415, 'epoch': 1} {'type': 'loss', 'content': 0.17408272624015808, 'timestamp': '2025-09-30 22:18:00.109485', 'step': 4416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.165536', 'step': 4416, 'epoch': 1} {'type': 'loss', 'content': 0.13408762216567993, 'timestamp': '2025-09-30 22:18:00.169297', 'step': 4417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.226812', 'step': 4417, 'epoch': 1} {'type': 'loss', 'content': 0.16700521111488342, 'timestamp': '2025-09-30 22:18:00.229299', 'step': 4418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:00.290252', 'step': 4418, 'epoch': 1} {'type': 'loss', 'content': 0.15200892090797424, 'timestamp': '2025-09-30 22:18:00.292771', 'step': 4419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.349630', 'step': 4419, 'epoch': 1} {'type': 'loss', 'content': 0.20368824899196625, 'timestamp': '2025-09-30 22:18:00.355820', 'step': 4420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.411806', 'step': 4420, 'epoch': 1} {'type': 'loss', 'content': 0.1481979638338089, 'timestamp': '2025-09-30 22:18:00.424899', 'step': 4421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:00.484141', 'step': 4421, 'epoch': 1} {'type': 'loss', 'content': 0.21025681495666504, 'timestamp': '2025-09-30 22:18:00.487516', 'step': 4422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.546183', 'step': 4422, 'epoch': 1} {'type': 'loss', 'content': 0.21778061985969543, 'timestamp': '2025-09-30 22:18:00.562758', 'step': 4423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:00.620342', 'step': 4423, 'epoch': 1} {'type': 'loss', 'content': 0.13921013474464417, 'timestamp': '2025-09-30 22:18:00.628001', 'step': 4424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:00.685863', 'step': 4424, 'epoch': 1} {'type': 'loss', 'content': 0.19174645841121674, 'timestamp': '2025-09-30 22:18:00.689731', 'step': 4425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.748586', 'step': 4425, 'epoch': 1} {'type': 'loss', 'content': 0.216201052069664, 'timestamp': '2025-09-30 22:18:00.751618', 'step': 4426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:00.808884', 'step': 4426, 'epoch': 1} {'type': 'loss', 'content': 0.24416770040988922, 'timestamp': '2025-09-30 22:18:00.813309', 'step': 4427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:00.874014', 'step': 4427, 'epoch': 1} {'type': 'loss', 'content': 0.09398520737886429, 'timestamp': '2025-09-30 22:18:00.883724', 'step': 4428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:00.940967', 'step': 4428, 'epoch': 1} {'type': 'loss', 'content': 0.21121008694171906, 'timestamp': '2025-09-30 22:18:00.946534', 'step': 4429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:01.007861', 'step': 4429, 'epoch': 1} {'type': 'loss', 'content': 0.13318407535552979, 'timestamp': '2025-09-30 22:18:01.011254', 'step': 4430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:01.067927', 'step': 4430, 'epoch': 1} {'type': 'loss', 'content': 0.18027688562870026, 'timestamp': '2025-09-30 22:18:01.072512', 'step': 4431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:01.134890', 'step': 4431, 'epoch': 1} {'type': 'loss', 'content': 0.24050791561603546, 'timestamp': '2025-09-30 22:18:01.141369', 'step': 4432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:01.197225', 'step': 4432, 'epoch': 1} {'type': 'loss', 'content': 0.19709137082099915, 'timestamp': '2025-09-30 22:18:01.200071', 'step': 4433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:01.257127', 'step': 4433, 'epoch': 1} {'type': 'loss', 'content': 0.18580995500087738, 'timestamp': '2025-09-30 22:18:01.263250', 'step': 4434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:01.321503', 'step': 4434, 'epoch': 1} {'type': 'loss', 'content': 0.2711455225944519, 'timestamp': '2025-09-30 22:18:01.324111', 'step': 4435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:01.383500', 'step': 4435, 'epoch': 1} {'type': 'loss', 'content': 0.08607166260480881, 'timestamp': '2025-09-30 22:18:01.389683', 'step': 4436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:01.445567', 'step': 4436, 'epoch': 1} {'type': 'loss', 'content': 0.2442842572927475, 'timestamp': '2025-09-30 22:18:01.448759', 'step': 4437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:01.505776', 'step': 4437, 'epoch': 1} {'type': 'loss', 'content': 0.1266392171382904, 'timestamp': '2025-09-30 22:18:01.509221', 'step': 4438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:01.579157', 'step': 4438, 'epoch': 1} {'type': 'loss', 'content': 0.17093457281589508, 'timestamp': '2025-09-30 22:18:01.582762', 'step': 4439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:01.640067', 'step': 4439, 'epoch': 1} {'type': 'loss', 'content': 0.14642442762851715, 'timestamp': '2025-09-30 22:18:01.647506', 'step': 4440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:01.704709', 'step': 4440, 'epoch': 1} {'type': 'loss', 'content': 0.38503164052963257, 'timestamp': '2025-09-30 22:18:01.707892', 'step': 4441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:01.765229', 'step': 4441, 'epoch': 1} {'type': 'loss', 'content': 0.15592628717422485, 'timestamp': '2025-09-30 22:18:01.770422', 'step': 4442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:01.839579', 'step': 4442, 'epoch': 1} {'type': 'loss', 'content': 0.19603414833545685, 'timestamp': '2025-09-30 22:18:01.843891', 'step': 4443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:01.902260', 'step': 4443, 'epoch': 1} {'type': 'loss', 'content': 0.23060975968837738, 'timestamp': '2025-09-30 22:18:01.908926', 'step': 4444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:01.967299', 'step': 4444, 'epoch': 1} {'type': 'loss', 'content': 0.16471432149410248, 'timestamp': '2025-09-30 22:18:01.970832', 'step': 4445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:02.028990', 'step': 4445, 'epoch': 1} {'type': 'loss', 'content': 0.12890370190143585, 'timestamp': '2025-09-30 22:18:02.031757', 'step': 4446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:02.090360', 'step': 4446, 'epoch': 1} {'type': 'loss', 'content': 0.09020679444074631, 'timestamp': '2025-09-30 22:18:02.096813', 'step': 4447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:02.158827', 'step': 4447, 'epoch': 1} {'type': 'loss', 'content': 0.22060354053974152, 'timestamp': '2025-09-30 22:18:02.166170', 'step': 4448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:02.223196', 'step': 4448, 'epoch': 1} {'type': 'loss', 'content': 0.11579468846321106, 'timestamp': '2025-09-30 22:18:02.230193', 'step': 4449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:02.292062', 'step': 4449, 'epoch': 1} {'type': 'loss', 'content': 0.26622387766838074, 'timestamp': '2025-09-30 22:18:02.294986', 'step': 4450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:02.368872', 'step': 4450, 'epoch': 1} {'type': 'loss', 'content': 0.2030896246433258, 'timestamp': '2025-09-30 22:18:02.371724', 'step': 4451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:02.432179', 'step': 4451, 'epoch': 1} {'type': 'loss', 'content': 0.15636876225471497, 'timestamp': '2025-09-30 22:18:02.442303', 'step': 4452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:02.502096', 'step': 4452, 'epoch': 1} {'type': 'loss', 'content': 0.15059486031532288, 'timestamp': '2025-09-30 22:18:02.504714', 'step': 4453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:02.563223', 'step': 4453, 'epoch': 1} {'type': 'loss', 'content': 0.14556610584259033, 'timestamp': '2025-09-30 22:18:02.566549', 'step': 4454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:02.624222', 'step': 4454, 'epoch': 1} {'type': 'loss', 'content': 0.27497386932373047, 'timestamp': '2025-09-30 22:18:02.626558', 'step': 4455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:02.691951', 'step': 4455, 'epoch': 1} {'type': 'loss', 'content': 0.12225326895713806, 'timestamp': '2025-09-30 22:18:02.699788', 'step': 4456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:02.765338', 'step': 4456, 'epoch': 1} {'type': 'loss', 'content': 0.0913982018828392, 'timestamp': '2025-09-30 22:18:02.768246', 'step': 4457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:02.832249', 'step': 4457, 'epoch': 1} {'type': 'loss', 'content': 0.18589381873607635, 'timestamp': '2025-09-30 22:18:02.834873', 'step': 4458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:02.902547', 'step': 4458, 'epoch': 1} {'type': 'loss', 'content': 0.09695377945899963, 'timestamp': '2025-09-30 22:18:02.907921', 'step': 4459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:02.964973', 'step': 4459, 'epoch': 1} {'type': 'loss', 'content': 0.18500269949436188, 'timestamp': '2025-09-30 22:18:02.971372', 'step': 4460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:03.038726', 'step': 4460, 'epoch': 1} {'type': 'loss', 'content': 0.16899454593658447, 'timestamp': '2025-09-30 22:18:03.043098', 'step': 4461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:03.109462', 'step': 4461, 'epoch': 1} {'type': 'loss', 'content': 0.08794369548559189, 'timestamp': '2025-09-30 22:18:03.113623', 'step': 4462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:03.172069', 'step': 4462, 'epoch': 1} {'type': 'loss', 'content': 0.18468454480171204, 'timestamp': '2025-09-30 22:18:03.176099', 'step': 4463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:03.243137', 'step': 4463, 'epoch': 1} {'type': 'loss', 'content': 0.2195391058921814, 'timestamp': '2025-09-30 22:18:03.257362', 'step': 4464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:03.313509', 'step': 4464, 'epoch': 1} {'type': 'loss', 'content': 0.14748863875865936, 'timestamp': '2025-09-30 22:18:03.316755', 'step': 4465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:03.375804', 'step': 4465, 'epoch': 1} {'type': 'loss', 'content': 0.192661851644516, 'timestamp': '2025-09-30 22:18:03.378370', 'step': 4466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:03.442879', 'step': 4466, 'epoch': 1} {'type': 'loss', 'content': 0.1670154631137848, 'timestamp': '2025-09-30 22:18:03.455046', 'step': 4467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:18:03.515643', 'step': 4467, 'epoch': 1} {'type': 'loss', 'content': 0.14884750545024872, 'timestamp': '2025-09-30 22:18:03.523480', 'step': 4468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:03.580127', 'step': 4468, 'epoch': 1} {'type': 'loss', 'content': 0.14455512166023254, 'timestamp': '2025-09-30 22:18:03.584189', 'step': 4469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:03.661964', 'step': 4469, 'epoch': 1} {'type': 'loss', 'content': 0.13864530622959137, 'timestamp': '2025-09-30 22:18:03.673439', 'step': 4470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:03.731130', 'step': 4470, 'epoch': 1} {'type': 'loss', 'content': 0.23936837911605835, 'timestamp': '2025-09-30 22:18:03.734543', 'step': 4471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:03.792364', 'step': 4471, 'epoch': 1} {'type': 'loss', 'content': 0.1269167810678482, 'timestamp': '2025-09-30 22:18:03.800022', 'step': 4472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:03.858831', 'step': 4472, 'epoch': 1} {'type': 'loss', 'content': 0.21114791929721832, 'timestamp': '2025-09-30 22:18:03.862865', 'step': 4473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:03.926695', 'step': 4473, 'epoch': 1} {'type': 'loss', 'content': 0.07766911387443542, 'timestamp': '2025-09-30 22:18:03.942371', 'step': 4474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:04.002265', 'step': 4474, 'epoch': 1} {'type': 'loss', 'content': 0.20726698637008667, 'timestamp': '2025-09-30 22:18:04.005663', 'step': 4475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:04.064450', 'step': 4475, 'epoch': 1} {'type': 'loss', 'content': 0.1776696890592575, 'timestamp': '2025-09-30 22:18:04.076736', 'step': 4476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:04.139306', 'step': 4476, 'epoch': 1} {'type': 'loss', 'content': 0.130095973610878, 'timestamp': '2025-09-30 22:18:04.144496', 'step': 4477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:04.213549', 'step': 4477, 'epoch': 1} {'type': 'loss', 'content': 0.12590359151363373, 'timestamp': '2025-09-30 22:18:04.227807', 'step': 4478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:04.285687', 'step': 4478, 'epoch': 1} {'type': 'loss', 'content': 0.0830693170428276, 'timestamp': '2025-09-30 22:18:04.291879', 'step': 4479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:04.349422', 'step': 4479, 'epoch': 1} {'type': 'loss', 'content': 0.18406778573989868, 'timestamp': '2025-09-30 22:18:04.357345', 'step': 4480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:04.414618', 'step': 4480, 'epoch': 1} {'type': 'loss', 'content': 0.19266414642333984, 'timestamp': '2025-09-30 22:18:04.423340', 'step': 4481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:04.481470', 'step': 4481, 'epoch': 1} {'type': 'loss', 'content': 0.19611869752407074, 'timestamp': '2025-09-30 22:18:04.485258', 'step': 4482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:04.544467', 'step': 4482, 'epoch': 1} {'type': 'loss', 'content': 0.1273360252380371, 'timestamp': '2025-09-30 22:18:04.546994', 'step': 4483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:04.605017', 'step': 4483, 'epoch': 1} {'type': 'loss', 'content': 0.15726518630981445, 'timestamp': '2025-09-30 22:18:04.612626', 'step': 4484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:04.683936', 'step': 4484, 'epoch': 1} {'type': 'loss', 'content': 0.10711425542831421, 'timestamp': '2025-09-30 22:18:04.696035', 'step': 4485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:04.754194', 'step': 4485, 'epoch': 1} {'type': 'loss', 'content': 0.229974165558815, 'timestamp': '2025-09-30 22:18:04.765920', 'step': 4486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:04.823594', 'step': 4486, 'epoch': 1} {'type': 'loss', 'content': 0.17598994076251984, 'timestamp': '2025-09-30 22:18:04.827102', 'step': 4487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:04.884135', 'step': 4487, 'epoch': 1} {'type': 'loss', 'content': 0.10271517187356949, 'timestamp': '2025-09-30 22:18:04.904340', 'step': 4488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:04.969051', 'step': 4488, 'epoch': 1} {'type': 'loss', 'content': 0.15689317882061005, 'timestamp': '2025-09-30 22:18:04.971731', 'step': 4489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:05.028075', 'step': 4489, 'epoch': 1} {'type': 'loss', 'content': 0.1044655442237854, 'timestamp': '2025-09-30 22:18:05.036894', 'step': 4490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:05.093767', 'step': 4490, 'epoch': 1} {'type': 'loss', 'content': 0.173439621925354, 'timestamp': '2025-09-30 22:18:05.096587', 'step': 4491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:05.153584', 'step': 4491, 'epoch': 1} {'type': 'loss', 'content': 0.18198639154434204, 'timestamp': '2025-09-30 22:18:05.163895', 'step': 4492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:05.219944', 'step': 4492, 'epoch': 1} {'type': 'loss', 'content': 0.14653299748897552, 'timestamp': '2025-09-30 22:18:05.223025', 'step': 4493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:05.279645', 'step': 4493, 'epoch': 1} {'type': 'loss', 'content': 0.10124056786298752, 'timestamp': '2025-09-30 22:18:05.283435', 'step': 4494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:05.341883', 'step': 4494, 'epoch': 1} {'type': 'loss', 'content': 0.1292620748281479, 'timestamp': '2025-09-30 22:18:05.346002', 'step': 4495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:05.403158', 'step': 4495, 'epoch': 1} {'type': 'loss', 'content': 0.15514597296714783, 'timestamp': '2025-09-30 22:18:05.409784', 'step': 4496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:05.479013', 'step': 4496, 'epoch': 1} {'type': 'loss', 'content': 0.2964792251586914, 'timestamp': '2025-09-30 22:18:05.483351', 'step': 4497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:05.550485', 'step': 4497, 'epoch': 1} {'type': 'loss', 'content': 0.1606808453798294, 'timestamp': '2025-09-30 22:18:05.554327', 'step': 4498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:05.622421', 'step': 4498, 'epoch': 1} {'type': 'loss', 'content': 0.22774334251880646, 'timestamp': '2025-09-30 22:18:05.635322', 'step': 4499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:05.698804', 'step': 4499, 'epoch': 1} {'type': 'loss', 'content': 0.12266547977924347, 'timestamp': '2025-09-30 22:18:05.711123', 'step': 4500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-30 22:18:06.115087', 'step': 4500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:06.176224', 'step': 4500, 'epoch': 1} {'type': 'loss', 'content': 0.14403891563415527, 'timestamp': '2025-09-30 22:18:06.186932', 'step': 4501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:06.250956', 'step': 4501, 'epoch': 1} {'type': 'loss', 'content': 0.1052522212266922, 'timestamp': '2025-09-30 22:18:06.254356', 'step': 4502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:06.329879', 'step': 4502, 'epoch': 1} {'type': 'loss', 'content': 0.2684444785118103, 'timestamp': '2025-09-30 22:18:06.342181', 'step': 4503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:06.399827', 'step': 4503, 'epoch': 1} {'type': 'loss', 'content': 0.1834048181772232, 'timestamp': '2025-09-30 22:18:06.407922', 'step': 4504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:06.470874', 'step': 4504, 'epoch': 1} {'type': 'loss', 'content': 0.27970582246780396, 'timestamp': '2025-09-30 22:18:06.473685', 'step': 4505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:06.532183', 'step': 4505, 'epoch': 1} {'type': 'loss', 'content': 0.11016052961349487, 'timestamp': '2025-09-30 22:18:06.535352', 'step': 4506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:06.593594', 'step': 4506, 'epoch': 1} {'type': 'loss', 'content': 0.2313866764307022, 'timestamp': '2025-09-30 22:18:06.596621', 'step': 4507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:06.661485', 'step': 4507, 'epoch': 1} {'type': 'loss', 'content': 0.20300725102424622, 'timestamp': '2025-09-30 22:18:06.670280', 'step': 4508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:06.729068', 'step': 4508, 'epoch': 1} {'type': 'loss', 'content': 0.1585846096277237, 'timestamp': '2025-09-30 22:18:06.733405', 'step': 4509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:06.797811', 'step': 4509, 'epoch': 1} {'type': 'loss', 'content': 0.1823497861623764, 'timestamp': '2025-09-30 22:18:06.813281', 'step': 4510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:06.870396', 'step': 4510, 'epoch': 1} {'type': 'loss', 'content': 0.16684548556804657, 'timestamp': '2025-09-30 22:18:06.874206', 'step': 4511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:06.938508', 'step': 4511, 'epoch': 1} {'type': 'loss', 'content': 0.2291230857372284, 'timestamp': '2025-09-30 22:18:06.944934', 'step': 4512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:07.001286', 'step': 4512, 'epoch': 1} {'type': 'loss', 'content': 0.19134877622127533, 'timestamp': '2025-09-30 22:18:07.009885', 'step': 4513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:07.067397', 'step': 4513, 'epoch': 1} {'type': 'loss', 'content': 0.245871901512146, 'timestamp': '2025-09-30 22:18:07.070878', 'step': 4514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:07.130755', 'step': 4514, 'epoch': 1} {'type': 'loss', 'content': 0.1473238617181778, 'timestamp': '2025-09-30 22:18:07.135390', 'step': 4515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:07.193738', 'step': 4515, 'epoch': 1} {'type': 'loss', 'content': 0.15207195281982422, 'timestamp': '2025-09-30 22:18:07.202499', 'step': 4516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:07.260321', 'step': 4516, 'epoch': 1} {'type': 'loss', 'content': 0.13401703536510468, 'timestamp': '2025-09-30 22:18:07.264355', 'step': 4517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:07.328505', 'step': 4517, 'epoch': 1} {'type': 'loss', 'content': 0.18249565362930298, 'timestamp': '2025-09-30 22:18:07.331683', 'step': 4518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:07.388950', 'step': 4518, 'epoch': 1} {'type': 'loss', 'content': 0.14419753849506378, 'timestamp': '2025-09-30 22:18:07.393011', 'step': 4519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:07.450253', 'step': 4519, 'epoch': 1} {'type': 'loss', 'content': 0.156056746840477, 'timestamp': '2025-09-30 22:18:07.461758', 'step': 4520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:07.520777', 'step': 4520, 'epoch': 1} {'type': 'loss', 'content': 0.19176599383354187, 'timestamp': '2025-09-30 22:18:07.524572', 'step': 4521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:07.589457', 'step': 4521, 'epoch': 1} {'type': 'loss', 'content': 0.18331246078014374, 'timestamp': '2025-09-30 22:18:07.592945', 'step': 4522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:07.650207', 'step': 4522, 'epoch': 1} {'type': 'loss', 'content': 0.1142784059047699, 'timestamp': '2025-09-30 22:18:07.652938', 'step': 4523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:07.715287', 'step': 4523, 'epoch': 1} {'type': 'loss', 'content': 0.15835313498973846, 'timestamp': '2025-09-30 22:18:07.735925', 'step': 4524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:07.791939', 'step': 4524, 'epoch': 1} {'type': 'loss', 'content': 0.13206104934215546, 'timestamp': '2025-09-30 22:18:07.795564', 'step': 4525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:07.852883', 'step': 4525, 'epoch': 1} {'type': 'loss', 'content': 0.16458769142627716, 'timestamp': '2025-09-30 22:18:07.856296', 'step': 4526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:07.913139', 'step': 4526, 'epoch': 1} {'type': 'loss', 'content': 0.15207445621490479, 'timestamp': '2025-09-30 22:18:07.917926', 'step': 4527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:07.979416', 'step': 4527, 'epoch': 1} {'type': 'loss', 'content': 0.13417932391166687, 'timestamp': '2025-09-30 22:18:07.990644', 'step': 4528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:08.053350', 'step': 4528, 'epoch': 1} {'type': 'loss', 'content': 0.19314683973789215, 'timestamp': '2025-09-30 22:18:08.064315', 'step': 4529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:08.130449', 'step': 4529, 'epoch': 1} {'type': 'loss', 'content': 0.15873080492019653, 'timestamp': '2025-09-30 22:18:08.134386', 'step': 4530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:08.199068', 'step': 4530, 'epoch': 1} {'type': 'loss', 'content': 0.15949282050132751, 'timestamp': '2025-09-30 22:18:08.202483', 'step': 4531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:08.259103', 'step': 4531, 'epoch': 1} {'type': 'loss', 'content': 0.15478788316249847, 'timestamp': '2025-09-30 22:18:08.266096', 'step': 4532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:08.323836', 'step': 4532, 'epoch': 1} {'type': 'loss', 'content': 0.1522301435470581, 'timestamp': '2025-09-30 22:18:08.333194', 'step': 4533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:08.391115', 'step': 4533, 'epoch': 1} {'type': 'loss', 'content': 0.35494425892829895, 'timestamp': '2025-09-30 22:18:08.394361', 'step': 4534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:08.452593', 'step': 4534, 'epoch': 1} {'type': 'loss', 'content': 0.1702536791563034, 'timestamp': '2025-09-30 22:18:08.464539', 'step': 4535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:08.529882', 'step': 4535, 'epoch': 1} {'type': 'loss', 'content': 0.17261932790279388, 'timestamp': '2025-09-30 22:18:08.537742', 'step': 4536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:08.595174', 'step': 4536, 'epoch': 1} {'type': 'loss', 'content': 0.19223442673683167, 'timestamp': '2025-09-30 22:18:08.598700', 'step': 4537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:08.655792', 'step': 4537, 'epoch': 1} {'type': 'loss', 'content': 0.19891847670078278, 'timestamp': '2025-09-30 22:18:08.661006', 'step': 4538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:08.719908', 'step': 4538, 'epoch': 1} {'type': 'loss', 'content': 0.14143961668014526, 'timestamp': '2025-09-30 22:18:08.728630', 'step': 4539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:08.791219', 'step': 4539, 'epoch': 1} {'type': 'loss', 'content': 0.18155212700366974, 'timestamp': '2025-09-30 22:18:08.797604', 'step': 4540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:08.858294', 'step': 4540, 'epoch': 1} {'type': 'loss', 'content': 0.203171968460083, 'timestamp': '2025-09-30 22:18:08.867128', 'step': 4541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:18:08.924671', 'step': 4541, 'epoch': 1} {'type': 'loss', 'content': 0.12107975780963898, 'timestamp': '2025-09-30 22:18:08.936157', 'step': 4542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.008880', 'step': 4542, 'epoch': 1} {'type': 'loss', 'content': 0.24901056289672852, 'timestamp': '2025-09-30 22:18:09.012954', 'step': 4543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.071059', 'step': 4543, 'epoch': 1} {'type': 'loss', 'content': 0.10276991128921509, 'timestamp': '2025-09-30 22:18:09.087431', 'step': 4544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.144565', 'step': 4544, 'epoch': 1} {'type': 'loss', 'content': 0.2675677537918091, 'timestamp': '2025-09-30 22:18:09.148495', 'step': 4545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.218054', 'step': 4545, 'epoch': 1} {'type': 'loss', 'content': 0.09822426736354828, 'timestamp': '2025-09-30 22:18:09.229375', 'step': 4546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.294207', 'step': 4546, 'epoch': 1} {'type': 'loss', 'content': 0.136981800198555, 'timestamp': '2025-09-30 22:18:09.298516', 'step': 4547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.361971', 'step': 4547, 'epoch': 1} {'type': 'loss', 'content': 0.1947336196899414, 'timestamp': '2025-09-30 22:18:09.368954', 'step': 4548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:09.436738', 'step': 4548, 'epoch': 1} {'type': 'loss', 'content': 0.1513276994228363, 'timestamp': '2025-09-30 22:18:09.440849', 'step': 4549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:09.497808', 'step': 4549, 'epoch': 1} {'type': 'loss', 'content': 0.17256313562393188, 'timestamp': '2025-09-30 22:18:09.501357', 'step': 4550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:09.559938', 'step': 4550, 'epoch': 1} {'type': 'loss', 'content': 0.061759158968925476, 'timestamp': '2025-09-30 22:18:09.567040', 'step': 4551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:09.632059', 'step': 4551, 'epoch': 1} {'type': 'loss', 'content': 0.14381228387355804, 'timestamp': '2025-09-30 22:18:09.646536', 'step': 4552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:09.704332', 'step': 4552, 'epoch': 1} {'type': 'loss', 'content': 0.12250745296478271, 'timestamp': '2025-09-30 22:18:09.713339', 'step': 4553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:09.771662', 'step': 4553, 'epoch': 1} {'type': 'loss', 'content': 0.145951509475708, 'timestamp': '2025-09-30 22:18:09.778204', 'step': 4554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.842743', 'step': 4554, 'epoch': 1} {'type': 'loss', 'content': 0.17978999018669128, 'timestamp': '2025-09-30 22:18:09.853163', 'step': 4555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:09.914815', 'step': 4555, 'epoch': 1} {'type': 'loss', 'content': 0.09166879206895828, 'timestamp': '2025-09-30 22:18:09.926828', 'step': 4556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:09.984259', 'step': 4556, 'epoch': 1} {'type': 'loss', 'content': 0.15268713235855103, 'timestamp': '2025-09-30 22:18:09.988248', 'step': 4557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:10.049104', 'step': 4557, 'epoch': 1} {'type': 'loss', 'content': 0.10213903337717056, 'timestamp': '2025-09-30 22:18:10.053544', 'step': 4558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:10.112904', 'step': 4558, 'epoch': 1} {'type': 'loss', 'content': 0.11949214339256287, 'timestamp': '2025-09-30 22:18:10.117474', 'step': 4559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:10.175832', 'step': 4559, 'epoch': 1} {'type': 'loss', 'content': 0.159677192568779, 'timestamp': '2025-09-30 22:18:10.183017', 'step': 4560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:10.240047', 'step': 4560, 'epoch': 1} {'type': 'loss', 'content': 0.12350746244192123, 'timestamp': '2025-09-30 22:18:10.243139', 'step': 4561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:10.300612', 'step': 4561, 'epoch': 1} {'type': 'loss', 'content': 0.14792953431606293, 'timestamp': '2025-09-30 22:18:10.316259', 'step': 4562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:10.378247', 'step': 4562, 'epoch': 1} {'type': 'loss', 'content': 0.12938116490840912, 'timestamp': '2025-09-30 22:18:10.381320', 'step': 4563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:10.438926', 'step': 4563, 'epoch': 1} {'type': 'loss', 'content': 0.16465024650096893, 'timestamp': '2025-09-30 22:18:10.445453', 'step': 4564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:10.507707', 'step': 4564, 'epoch': 1} {'type': 'loss', 'content': 0.13528446853160858, 'timestamp': '2025-09-30 22:18:10.514116', 'step': 4565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:10.574328', 'step': 4565, 'epoch': 1} {'type': 'loss', 'content': 0.0947519838809967, 'timestamp': '2025-09-30 22:18:10.579977', 'step': 4566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:10.641125', 'step': 4566, 'epoch': 1} {'type': 'loss', 'content': 0.2070067971944809, 'timestamp': '2025-09-30 22:18:10.646984', 'step': 4567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:10.706085', 'step': 4567, 'epoch': 1} {'type': 'loss', 'content': 0.13394872844219208, 'timestamp': '2025-09-30 22:18:10.715920', 'step': 4568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:10.777758', 'step': 4568, 'epoch': 1} {'type': 'loss', 'content': 0.19623695313930511, 'timestamp': '2025-09-30 22:18:10.787135', 'step': 4569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:10.858261', 'step': 4569, 'epoch': 1} {'type': 'loss', 'content': 0.23035608232021332, 'timestamp': '2025-09-30 22:18:10.860991', 'step': 4570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:10.918108', 'step': 4570, 'epoch': 1} {'type': 'loss', 'content': 0.16661493480205536, 'timestamp': '2025-09-30 22:18:10.920900', 'step': 4571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:10.978861', 'step': 4571, 'epoch': 1} {'type': 'loss', 'content': 0.17222918570041656, 'timestamp': '2025-09-30 22:18:10.995272', 'step': 4572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:11.051405', 'step': 4572, 'epoch': 1} {'type': 'loss', 'content': 0.19004522264003754, 'timestamp': '2025-09-30 22:18:11.057799', 'step': 4573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:11.115040', 'step': 4573, 'epoch': 1} {'type': 'loss', 'content': 0.10062229633331299, 'timestamp': '2025-09-30 22:18:11.118444', 'step': 4574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:11.175534', 'step': 4574, 'epoch': 1} {'type': 'loss', 'content': 0.16713613271713257, 'timestamp': '2025-09-30 22:18:11.177982', 'step': 4575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:11.234892', 'step': 4575, 'epoch': 1} {'type': 'loss', 'content': 0.19172069430351257, 'timestamp': '2025-09-30 22:18:11.242000', 'step': 4576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:11.299275', 'step': 4576, 'epoch': 1} {'type': 'loss', 'content': 0.18962204456329346, 'timestamp': '2025-09-30 22:18:11.304151', 'step': 4577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:11.373003', 'step': 4577, 'epoch': 1} {'type': 'loss', 'content': 0.1760389357805252, 'timestamp': '2025-09-30 22:18:11.379041', 'step': 4578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:11.436607', 'step': 4578, 'epoch': 1} {'type': 'loss', 'content': 0.09254103153944016, 'timestamp': '2025-09-30 22:18:11.446721', 'step': 4579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:11.503151', 'step': 4579, 'epoch': 1} {'type': 'loss', 'content': 0.13639411330223083, 'timestamp': '2025-09-30 22:18:11.510597', 'step': 4580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:11.566113', 'step': 4580, 'epoch': 1} {'type': 'loss', 'content': 0.1615467071533203, 'timestamp': '2025-09-30 22:18:11.569128', 'step': 4581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:11.634092', 'step': 4581, 'epoch': 1} {'type': 'loss', 'content': 0.16180147230625153, 'timestamp': '2025-09-30 22:18:11.636892', 'step': 4582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:11.693530', 'step': 4582, 'epoch': 1} {'type': 'loss', 'content': 0.2643715739250183, 'timestamp': '2025-09-30 22:18:11.696330', 'step': 4583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:11.753413', 'step': 4583, 'epoch': 1} {'type': 'loss', 'content': 0.1063041239976883, 'timestamp': '2025-09-30 22:18:11.759980', 'step': 4584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:11.816433', 'step': 4584, 'epoch': 1} {'type': 'loss', 'content': 0.20609819889068604, 'timestamp': '2025-09-30 22:18:11.819588', 'step': 4585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:11.877189', 'step': 4585, 'epoch': 1} {'type': 'loss', 'content': 0.13361486792564392, 'timestamp': '2025-09-30 22:18:11.881568', 'step': 4586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:11.945562', 'step': 4586, 'epoch': 1} {'type': 'loss', 'content': 0.1103307232260704, 'timestamp': '2025-09-30 22:18:11.956395', 'step': 4587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:12.014280', 'step': 4587, 'epoch': 1} {'type': 'loss', 'content': 0.20640255510807037, 'timestamp': '2025-09-30 22:18:12.023033', 'step': 4588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:12.082192', 'step': 4588, 'epoch': 1} {'type': 'loss', 'content': 0.11809088289737701, 'timestamp': '2025-09-30 22:18:12.085868', 'step': 4589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:12.147849', 'step': 4589, 'epoch': 1} {'type': 'loss', 'content': 0.1182367354631424, 'timestamp': '2025-09-30 22:18:12.150843', 'step': 4590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:12.218405', 'step': 4590, 'epoch': 1} {'type': 'loss', 'content': 0.13148923218250275, 'timestamp': '2025-09-30 22:18:12.223146', 'step': 4591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:18:12.281246', 'step': 4591, 'epoch': 1} {'type': 'loss', 'content': 0.19944001734256744, 'timestamp': '2025-09-30 22:18:12.288480', 'step': 4592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:12.348385', 'step': 4592, 'epoch': 1} {'type': 'loss', 'content': 0.1669916808605194, 'timestamp': '2025-09-30 22:18:12.351736', 'step': 4593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:12.417450', 'step': 4593, 'epoch': 1} {'type': 'loss', 'content': 0.23751850426197052, 'timestamp': '2025-09-30 22:18:12.424703', 'step': 4594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:12.482167', 'step': 4594, 'epoch': 1} {'type': 'loss', 'content': 0.16148357093334198, 'timestamp': '2025-09-30 22:18:12.484816', 'step': 4595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:12.541046', 'step': 4595, 'epoch': 1} {'type': 'loss', 'content': 0.11087093502283096, 'timestamp': '2025-09-30 22:18:12.547808', 'step': 4596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:12.603436', 'step': 4596, 'epoch': 1} {'type': 'loss', 'content': 0.1914428472518921, 'timestamp': '2025-09-30 22:18:12.606741', 'step': 4597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:12.667704', 'step': 4597, 'epoch': 1} {'type': 'loss', 'content': 0.0821533352136612, 'timestamp': '2025-09-30 22:18:12.684747', 'step': 4598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:12.756981', 'step': 4598, 'epoch': 1} {'type': 'loss', 'content': 0.14461174607276917, 'timestamp': '2025-09-30 22:18:12.761199', 'step': 4599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:12.819474', 'step': 4599, 'epoch': 1} {'type': 'loss', 'content': 0.18330590426921844, 'timestamp': '2025-09-30 22:18:12.828988', 'step': 4600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:12.885956', 'step': 4600, 'epoch': 1} {'type': 'loss', 'content': 0.19436970353126526, 'timestamp': '2025-09-30 22:18:12.892530', 'step': 4601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:12.951722', 'step': 4601, 'epoch': 1} {'type': 'loss', 'content': 0.1374412328004837, 'timestamp': '2025-09-30 22:18:12.956520', 'step': 4602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:13.028320', 'step': 4602, 'epoch': 1} {'type': 'loss', 'content': 0.19722554087638855, 'timestamp': '2025-09-30 22:18:13.034544', 'step': 4603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:13.111324', 'step': 4603, 'epoch': 1} {'type': 'loss', 'content': 0.12954778969287872, 'timestamp': '2025-09-30 22:18:13.122421', 'step': 4604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.179965', 'step': 4604, 'epoch': 1} {'type': 'loss', 'content': 0.14243917167186737, 'timestamp': '2025-09-30 22:18:13.183771', 'step': 4605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.243048', 'step': 4605, 'epoch': 1} {'type': 'loss', 'content': 0.15024393796920776, 'timestamp': '2025-09-30 22:18:13.247786', 'step': 4606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:13.310296', 'step': 4606, 'epoch': 1} {'type': 'loss', 'content': 0.12023995816707611, 'timestamp': '2025-09-30 22:18:13.316283', 'step': 4607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.386891', 'step': 4607, 'epoch': 1} {'type': 'loss', 'content': 0.15089532732963562, 'timestamp': '2025-09-30 22:18:13.395251', 'step': 4608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.453210', 'step': 4608, 'epoch': 1} {'type': 'loss', 'content': 0.14880332350730896, 'timestamp': '2025-09-30 22:18:13.466698', 'step': 4609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:13.525720', 'step': 4609, 'epoch': 1} {'type': 'loss', 'content': 0.1602432280778885, 'timestamp': '2025-09-30 22:18:13.529878', 'step': 4610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.588248', 'step': 4610, 'epoch': 1} {'type': 'loss', 'content': 0.15980219841003418, 'timestamp': '2025-09-30 22:18:13.604573', 'step': 4611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.662157', 'step': 4611, 'epoch': 1} {'type': 'loss', 'content': 0.09935227781534195, 'timestamp': '2025-09-30 22:18:13.668817', 'step': 4612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.726621', 'step': 4612, 'epoch': 1} {'type': 'loss', 'content': 0.2874838709831238, 'timestamp': '2025-09-30 22:18:13.730210', 'step': 4613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:13.799614', 'step': 4613, 'epoch': 1} {'type': 'loss', 'content': 0.1517157405614853, 'timestamp': '2025-09-30 22:18:13.813437', 'step': 4614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:13.872658', 'step': 4614, 'epoch': 1} {'type': 'loss', 'content': 0.13358919322490692, 'timestamp': '2025-09-30 22:18:13.876336', 'step': 4615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:13.937403', 'step': 4615, 'epoch': 1} {'type': 'loss', 'content': 0.17053358256816864, 'timestamp': '2025-09-30 22:18:13.946610', 'step': 4616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:14.005200', 'step': 4616, 'epoch': 1} {'type': 'loss', 'content': 0.127986341714859, 'timestamp': '2025-09-30 22:18:14.020738', 'step': 4617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:14.079583', 'step': 4617, 'epoch': 1} {'type': 'loss', 'content': 0.10218523442745209, 'timestamp': '2025-09-30 22:18:14.087344', 'step': 4618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:14.146370', 'step': 4618, 'epoch': 1} {'type': 'loss', 'content': 0.16259467601776123, 'timestamp': '2025-09-30 22:18:14.151303', 'step': 4619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:14.218794', 'step': 4619, 'epoch': 1} {'type': 'loss', 'content': 0.24398306012153625, 'timestamp': '2025-09-30 22:18:14.227354', 'step': 4620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:14.284520', 'step': 4620, 'epoch': 1} {'type': 'loss', 'content': 0.15434382855892181, 'timestamp': '2025-09-30 22:18:14.288087', 'step': 4621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:14.358559', 'step': 4621, 'epoch': 1} {'type': 'loss', 'content': 0.13037730753421783, 'timestamp': '2025-09-30 22:18:14.362838', 'step': 4622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:14.421347', 'step': 4622, 'epoch': 1} {'type': 'loss', 'content': 0.14037372171878815, 'timestamp': '2025-09-30 22:18:14.425532', 'step': 4623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:14.492729', 'step': 4623, 'epoch': 1} {'type': 'loss', 'content': 0.16449400782585144, 'timestamp': '2025-09-30 22:18:14.500301', 'step': 4624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:14.558159', 'step': 4624, 'epoch': 1} {'type': 'loss', 'content': 0.418048232793808, 'timestamp': '2025-09-30 22:18:14.562471', 'step': 4625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:14.620695', 'step': 4625, 'epoch': 1} {'type': 'loss', 'content': 0.220994234085083, 'timestamp': '2025-09-30 22:18:14.624572', 'step': 4626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:14.681603', 'step': 4626, 'epoch': 1} {'type': 'loss', 'content': 0.20923440158367157, 'timestamp': '2025-09-30 22:18:14.685392', 'step': 4627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:14.755393', 'step': 4627, 'epoch': 1} {'type': 'loss', 'content': 0.08441805839538574, 'timestamp': '2025-09-30 22:18:14.762497', 'step': 4628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:14.832293', 'step': 4628, 'epoch': 1} {'type': 'loss', 'content': 0.1978980451822281, 'timestamp': '2025-09-30 22:18:14.836174', 'step': 4629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:14.905313', 'step': 4629, 'epoch': 1} {'type': 'loss', 'content': 0.20479314029216766, 'timestamp': '2025-09-30 22:18:14.909586', 'step': 4630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:14.966870', 'step': 4630, 'epoch': 1} {'type': 'loss', 'content': 0.12324487417936325, 'timestamp': '2025-09-30 22:18:14.971726', 'step': 4631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:15.041246', 'step': 4631, 'epoch': 1} {'type': 'loss', 'content': 0.16388140618801117, 'timestamp': '2025-09-30 22:18:15.050615', 'step': 4632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:15.117536', 'step': 4632, 'epoch': 1} {'type': 'loss', 'content': 0.18286435306072235, 'timestamp': '2025-09-30 22:18:15.121958', 'step': 4633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:15.180537', 'step': 4633, 'epoch': 1} {'type': 'loss', 'content': 0.14388041198253632, 'timestamp': '2025-09-30 22:18:15.183946', 'step': 4634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:15.242123', 'step': 4634, 'epoch': 1} {'type': 'loss', 'content': 0.16563083231449127, 'timestamp': '2025-09-30 22:18:15.256662', 'step': 4635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:15.328753', 'step': 4635, 'epoch': 1} {'type': 'loss', 'content': 0.1328554004430771, 'timestamp': '2025-09-30 22:18:15.338220', 'step': 4636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:15.395185', 'step': 4636, 'epoch': 1} {'type': 'loss', 'content': 0.21379657089710236, 'timestamp': '2025-09-30 22:18:15.409176', 'step': 4637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:15.466463', 'step': 4637, 'epoch': 1} {'type': 'loss', 'content': 0.1757076382637024, 'timestamp': '2025-09-30 22:18:15.483318', 'step': 4638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:15.551233', 'step': 4638, 'epoch': 1} {'type': 'loss', 'content': 0.26115748286247253, 'timestamp': '2025-09-30 22:18:15.554927', 'step': 4639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:15.625342', 'step': 4639, 'epoch': 1} {'type': 'loss', 'content': 0.22606158256530762, 'timestamp': '2025-09-30 22:18:15.644275', 'step': 4640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:15.701597', 'step': 4640, 'epoch': 1} {'type': 'loss', 'content': 0.15549436211585999, 'timestamp': '2025-09-30 22:18:15.705416', 'step': 4641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:15.776398', 'step': 4641, 'epoch': 1} {'type': 'loss', 'content': 0.1977136880159378, 'timestamp': '2025-09-30 22:18:15.792423', 'step': 4642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:15.859386', 'step': 4642, 'epoch': 1} {'type': 'loss', 'content': 0.07739345729351044, 'timestamp': '2025-09-30 22:18:15.864234', 'step': 4643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:15.932512', 'step': 4643, 'epoch': 1} {'type': 'loss', 'content': 0.21996049582958221, 'timestamp': '2025-09-30 22:18:15.942105', 'step': 4644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:15.999780', 'step': 4644, 'epoch': 1} {'type': 'loss', 'content': 0.159309521317482, 'timestamp': '2025-09-30 22:18:16.005093', 'step': 4645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:16.065860', 'step': 4645, 'epoch': 1} {'type': 'loss', 'content': 0.1942615807056427, 'timestamp': '2025-09-30 22:18:16.070977', 'step': 4646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:16.130881', 'step': 4646, 'epoch': 1} {'type': 'loss', 'content': 0.17007312178611755, 'timestamp': '2025-09-30 22:18:16.135171', 'step': 4647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:16.194327', 'step': 4647, 'epoch': 1} {'type': 'loss', 'content': 0.1410946547985077, 'timestamp': '2025-09-30 22:18:16.201610', 'step': 4648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:16.258093', 'step': 4648, 'epoch': 1} {'type': 'loss', 'content': 0.2119446098804474, 'timestamp': '2025-09-30 22:18:16.274505', 'step': 4649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:16.354856', 'step': 4649, 'epoch': 1} {'type': 'loss', 'content': 0.17974324524402618, 'timestamp': '2025-09-30 22:18:16.359746', 'step': 4650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:16.418118', 'step': 4650, 'epoch': 1} {'type': 'loss', 'content': 0.27847355604171753, 'timestamp': '2025-09-30 22:18:16.422139', 'step': 4651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:16.499459', 'step': 4651, 'epoch': 1} {'type': 'loss', 'content': 0.11135100573301315, 'timestamp': '2025-09-30 22:18:16.506397', 'step': 4652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:16.563588', 'step': 4652, 'epoch': 1} {'type': 'loss', 'content': 0.18594461679458618, 'timestamp': '2025-09-30 22:18:16.568994', 'step': 4653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:16.625925', 'step': 4653, 'epoch': 1} {'type': 'loss', 'content': 0.24769903719425201, 'timestamp': '2025-09-30 22:18:16.630669', 'step': 4654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:16.689117', 'step': 4654, 'epoch': 1} {'type': 'loss', 'content': 0.16959407925605774, 'timestamp': '2025-09-30 22:18:16.696423', 'step': 4655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:16.755211', 'step': 4655, 'epoch': 1} {'type': 'loss', 'content': 0.13750940561294556, 'timestamp': '2025-09-30 22:18:16.763637', 'step': 4656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:16.823346', 'step': 4656, 'epoch': 1} {'type': 'loss', 'content': 0.2182423174381256, 'timestamp': '2025-09-30 22:18:16.829092', 'step': 4657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:16.899505', 'step': 4657, 'epoch': 1} {'type': 'loss', 'content': 0.206537663936615, 'timestamp': '2025-09-30 22:18:16.904919', 'step': 4658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:16.962881', 'step': 4658, 'epoch': 1} {'type': 'loss', 'content': 0.15613770484924316, 'timestamp': '2025-09-30 22:18:16.970644', 'step': 4659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:17.033139', 'step': 4659, 'epoch': 1} {'type': 'loss', 'content': 0.24265208840370178, 'timestamp': '2025-09-30 22:18:17.044672', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:18:30.708209', 'step': 4660, 'epoch': 1} {'type': 'pplx', 'content': 11498.272269053012, 'timestamp': '2025-09-30 22:18:30.714833', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:30.774177', 'step': 4660, 'epoch': 1} {'type': 'loss', 'content': 0.14124128222465515, 'timestamp': '2025-09-30 22:18:30.779180', 'step': 4661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:30.839125', 'step': 4661, 'epoch': 1} {'type': 'loss', 'content': 0.17441853880882263, 'timestamp': '2025-09-30 22:18:30.844198', 'step': 4662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:30.905113', 'step': 4662, 'epoch': 1} {'type': 'loss', 'content': 0.17671705782413483, 'timestamp': '2025-09-30 22:18:30.919632', 'step': 4663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:30.976572', 'step': 4663, 'epoch': 1} {'type': 'loss', 'content': 0.13942596316337585, 'timestamp': '2025-09-30 22:18:30.985209', 'step': 4664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:31.060761', 'step': 4664, 'epoch': 1} {'type': 'loss', 'content': 0.09333710372447968, 'timestamp': '2025-09-30 22:18:31.064537', 'step': 4665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:31.121745', 'step': 4665, 'epoch': 1} {'type': 'loss', 'content': 0.2291484922170639, 'timestamp': '2025-09-30 22:18:31.126226', 'step': 4666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:31.186313', 'step': 4666, 'epoch': 1} {'type': 'loss', 'content': 0.15281176567077637, 'timestamp': '2025-09-30 22:18:31.190282', 'step': 4667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:31.248166', 'step': 4667, 'epoch': 1} {'type': 'loss', 'content': 0.24482937157154083, 'timestamp': '2025-09-30 22:18:31.259208', 'step': 4668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:31.319288', 'step': 4668, 'epoch': 1} {'type': 'loss', 'content': 0.1467686891555786, 'timestamp': '2025-09-30 22:18:31.327507', 'step': 4669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:31.387470', 'step': 4669, 'epoch': 1} {'type': 'loss', 'content': 0.2295619249343872, 'timestamp': '2025-09-30 22:18:31.400867', 'step': 4670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:31.464161', 'step': 4670, 'epoch': 1} {'type': 'loss', 'content': 0.20079530775547028, 'timestamp': '2025-09-30 22:18:31.469100', 'step': 4671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:31.531636', 'step': 4671, 'epoch': 1} {'type': 'loss', 'content': 0.18433214724063873, 'timestamp': '2025-09-30 22:18:31.541008', 'step': 4672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:31.598407', 'step': 4672, 'epoch': 1} {'type': 'loss', 'content': 0.15204501152038574, 'timestamp': '2025-09-30 22:18:31.603779', 'step': 4673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:31.672199', 'step': 4673, 'epoch': 1} {'type': 'loss', 'content': 0.15770256519317627, 'timestamp': '2025-09-30 22:18:31.688863', 'step': 4674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:31.747837', 'step': 4674, 'epoch': 1} {'type': 'loss', 'content': 0.18103332817554474, 'timestamp': '2025-09-30 22:18:31.751837', 'step': 4675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:31.810376', 'step': 4675, 'epoch': 1} {'type': 'loss', 'content': 0.204158753156662, 'timestamp': '2025-09-30 22:18:31.817136', 'step': 4676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:31.877602', 'step': 4676, 'epoch': 1} {'type': 'loss', 'content': 0.2328817993402481, 'timestamp': '2025-09-30 22:18:31.883788', 'step': 4677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:31.943098', 'step': 4677, 'epoch': 1} {'type': 'loss', 'content': 0.14127607643604279, 'timestamp': '2025-09-30 22:18:31.953385', 'step': 4678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:32.012140', 'step': 4678, 'epoch': 1} {'type': 'loss', 'content': 0.1921394318342209, 'timestamp': '2025-09-30 22:18:32.019466', 'step': 4679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:32.078704', 'step': 4679, 'epoch': 1} {'type': 'loss', 'content': 0.19961300492286682, 'timestamp': '2025-09-30 22:18:32.098850', 'step': 4680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:32.160788', 'step': 4680, 'epoch': 1} {'type': 'loss', 'content': 0.12706387042999268, 'timestamp': '2025-09-30 22:18:32.177956', 'step': 4681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:32.239660', 'step': 4681, 'epoch': 1} {'type': 'loss', 'content': 0.19041943550109863, 'timestamp': '2025-09-30 22:18:32.243768', 'step': 4682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:32.314092', 'step': 4682, 'epoch': 1} {'type': 'loss', 'content': 0.12051056325435638, 'timestamp': '2025-09-30 22:18:32.320141', 'step': 4683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:32.379327', 'step': 4683, 'epoch': 1} {'type': 'loss', 'content': 0.230991393327713, 'timestamp': '2025-09-30 22:18:32.387722', 'step': 4684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:32.445508', 'step': 4684, 'epoch': 1} {'type': 'loss', 'content': 0.16778725385665894, 'timestamp': '2025-09-30 22:18:32.450446', 'step': 4685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:32.509527', 'step': 4685, 'epoch': 1} {'type': 'loss', 'content': 0.18255051970481873, 'timestamp': '2025-09-30 22:18:32.513579', 'step': 4686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:32.573112', 'step': 4686, 'epoch': 1} {'type': 'loss', 'content': 0.14579729735851288, 'timestamp': '2025-09-30 22:18:32.577882', 'step': 4687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:32.635979', 'step': 4687, 'epoch': 1} {'type': 'loss', 'content': 0.1249983012676239, 'timestamp': '2025-09-30 22:18:32.656947', 'step': 4688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:32.714830', 'step': 4688, 'epoch': 1} {'type': 'loss', 'content': 0.05981799215078354, 'timestamp': '2025-09-30 22:18:32.731536', 'step': 4689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:32.789611', 'step': 4689, 'epoch': 1} {'type': 'loss', 'content': 0.10934300720691681, 'timestamp': '2025-09-30 22:18:32.807353', 'step': 4690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:32.878186', 'step': 4690, 'epoch': 1} {'type': 'loss', 'content': 0.19704744219779968, 'timestamp': '2025-09-30 22:18:32.882642', 'step': 4691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:32.940031', 'step': 4691, 'epoch': 1} {'type': 'loss', 'content': 0.10924114286899567, 'timestamp': '2025-09-30 22:18:32.948497', 'step': 4692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:33.005694', 'step': 4692, 'epoch': 1} {'type': 'loss', 'content': 0.1139911562204361, 'timestamp': '2025-09-30 22:18:33.009103', 'step': 4693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:33.068082', 'step': 4693, 'epoch': 1} {'type': 'loss', 'content': 0.1776130348443985, 'timestamp': '2025-09-30 22:18:33.086188', 'step': 4694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:33.158287', 'step': 4694, 'epoch': 1} {'type': 'loss', 'content': 0.1706678569316864, 'timestamp': '2025-09-30 22:18:33.168181', 'step': 4695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:33.239965', 'step': 4695, 'epoch': 1} {'type': 'loss', 'content': 0.1388772875070572, 'timestamp': '2025-09-30 22:18:33.248574', 'step': 4696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:33.305581', 'step': 4696, 'epoch': 1} {'type': 'loss', 'content': 0.09351471811532974, 'timestamp': '2025-09-30 22:18:33.310100', 'step': 4697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:33.368415', 'step': 4697, 'epoch': 1} {'type': 'loss', 'content': 0.2187844067811966, 'timestamp': '2025-09-30 22:18:33.373972', 'step': 4698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:33.446439', 'step': 4698, 'epoch': 1} {'type': 'loss', 'content': 0.15147686004638672, 'timestamp': '2025-09-30 22:18:33.452119', 'step': 4699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:33.511220', 'step': 4699, 'epoch': 1} {'type': 'loss', 'content': 0.24754153192043304, 'timestamp': '2025-09-30 22:18:33.519362', 'step': 4700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:33.588157', 'step': 4700, 'epoch': 1} {'type': 'loss', 'content': 0.15879084169864655, 'timestamp': '2025-09-30 22:18:33.593536', 'step': 4701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:33.662635', 'step': 4701, 'epoch': 1} {'type': 'loss', 'content': 0.126860573887825, 'timestamp': '2025-09-30 22:18:33.667113', 'step': 4702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:33.725740', 'step': 4702, 'epoch': 1} {'type': 'loss', 'content': 0.12130440771579742, 'timestamp': '2025-09-30 22:18:33.730823', 'step': 4703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:33.788341', 'step': 4703, 'epoch': 1} {'type': 'loss', 'content': 0.19006666541099548, 'timestamp': '2025-09-30 22:18:33.797429', 'step': 4704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:33.867670', 'step': 4704, 'epoch': 1} {'type': 'loss', 'content': 0.10555917769670486, 'timestamp': '2025-09-30 22:18:33.873257', 'step': 4705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:33.931637', 'step': 4705, 'epoch': 1} {'type': 'loss', 'content': 0.11978480219841003, 'timestamp': '2025-09-30 22:18:33.936054', 'step': 4706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.002412', 'step': 4706, 'epoch': 1} {'type': 'loss', 'content': 0.16867461800575256, 'timestamp': '2025-09-30 22:18:34.006766', 'step': 4707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:34.065657', 'step': 4707, 'epoch': 1} {'type': 'loss', 'content': 0.14399413764476776, 'timestamp': '2025-09-30 22:18:34.073314', 'step': 4708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:34.138511', 'step': 4708, 'epoch': 1} {'type': 'loss', 'content': 0.13575147092342377, 'timestamp': '2025-09-30 22:18:34.150523', 'step': 4709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.224702', 'step': 4709, 'epoch': 1} {'type': 'loss', 'content': 0.146866112947464, 'timestamp': '2025-09-30 22:18:34.228867', 'step': 4710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:34.289471', 'step': 4710, 'epoch': 1} {'type': 'loss', 'content': 0.17707303166389465, 'timestamp': '2025-09-30 22:18:34.293059', 'step': 4711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.360523', 'step': 4711, 'epoch': 1} {'type': 'loss', 'content': 0.15056820213794708, 'timestamp': '2025-09-30 22:18:34.379974', 'step': 4712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.438608', 'step': 4712, 'epoch': 1} {'type': 'loss', 'content': 0.1695948988199234, 'timestamp': '2025-09-30 22:18:34.443752', 'step': 4713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:34.509588', 'step': 4713, 'epoch': 1} {'type': 'loss', 'content': 0.3821333944797516, 'timestamp': '2025-09-30 22:18:34.514548', 'step': 4714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:34.582839', 'step': 4714, 'epoch': 1} {'type': 'loss', 'content': 0.22985979914665222, 'timestamp': '2025-09-30 22:18:34.588369', 'step': 4715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.645268', 'step': 4715, 'epoch': 1} {'type': 'loss', 'content': 0.173051118850708, 'timestamp': '2025-09-30 22:18:34.652814', 'step': 4716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.712674', 'step': 4716, 'epoch': 1} {'type': 'loss', 'content': 0.18608422577381134, 'timestamp': '2025-09-30 22:18:34.722216', 'step': 4717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.783504', 'step': 4717, 'epoch': 1} {'type': 'loss', 'content': 0.12957903742790222, 'timestamp': '2025-09-30 22:18:34.786875', 'step': 4718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:34.844520', 'step': 4718, 'epoch': 1} {'type': 'loss', 'content': 0.1810627430677414, 'timestamp': '2025-09-30 22:18:34.850933', 'step': 4719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:34.914483', 'step': 4719, 'epoch': 1} {'type': 'loss', 'content': 0.08198866993188858, 'timestamp': '2025-09-30 22:18:34.921913', 'step': 4720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:34.988584', 'step': 4720, 'epoch': 1} {'type': 'loss', 'content': 0.21712124347686768, 'timestamp': '2025-09-30 22:18:34.993000', 'step': 4721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:35.058376', 'step': 4721, 'epoch': 1} {'type': 'loss', 'content': 0.12160076946020126, 'timestamp': '2025-09-30 22:18:35.062319', 'step': 4722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:35.122944', 'step': 4722, 'epoch': 1} {'type': 'loss', 'content': 0.1802157461643219, 'timestamp': '2025-09-30 22:18:35.135829', 'step': 4723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:35.201859', 'step': 4723, 'epoch': 1} {'type': 'loss', 'content': 0.12897218763828278, 'timestamp': '2025-09-30 22:18:35.210121', 'step': 4724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:35.267629', 'step': 4724, 'epoch': 1} {'type': 'loss', 'content': 0.1289425790309906, 'timestamp': '2025-09-30 22:18:35.272438', 'step': 4725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:35.331002', 'step': 4725, 'epoch': 1} {'type': 'loss', 'content': 0.282785028219223, 'timestamp': '2025-09-30 22:18:35.338375', 'step': 4726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:35.397179', 'step': 4726, 'epoch': 1} {'type': 'loss', 'content': 0.21472829580307007, 'timestamp': '2025-09-30 22:18:35.401743', 'step': 4727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:35.460285', 'step': 4727, 'epoch': 1} {'type': 'loss', 'content': 0.23300297558307648, 'timestamp': '2025-09-30 22:18:35.468833', 'step': 4728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:35.525987', 'step': 4728, 'epoch': 1} {'type': 'loss', 'content': 0.07138291746377945, 'timestamp': '2025-09-30 22:18:35.531179', 'step': 4729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:35.595246', 'step': 4729, 'epoch': 1} {'type': 'loss', 'content': 0.13854551315307617, 'timestamp': '2025-09-30 22:18:35.601371', 'step': 4730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:35.662871', 'step': 4730, 'epoch': 1} {'type': 'loss', 'content': 0.17965282499790192, 'timestamp': '2025-09-30 22:18:35.670944', 'step': 4731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:35.739136', 'step': 4731, 'epoch': 1} {'type': 'loss', 'content': 0.1751447170972824, 'timestamp': '2025-09-30 22:18:35.747953', 'step': 4732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:35.817475', 'step': 4732, 'epoch': 1} {'type': 'loss', 'content': 0.15435776114463806, 'timestamp': '2025-09-30 22:18:35.830613', 'step': 4733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:35.899418', 'step': 4733, 'epoch': 1} {'type': 'loss', 'content': 0.17045871913433075, 'timestamp': '2025-09-30 22:18:35.912976', 'step': 4734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:35.980161', 'step': 4734, 'epoch': 1} {'type': 'loss', 'content': 0.2391449362039566, 'timestamp': '2025-09-30 22:18:35.983783', 'step': 4735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.052147', 'step': 4735, 'epoch': 1} {'type': 'loss', 'content': 0.15972451865673065, 'timestamp': '2025-09-30 22:18:36.060114', 'step': 4736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.116888', 'step': 4736, 'epoch': 1} {'type': 'loss', 'content': 0.12456932663917542, 'timestamp': '2025-09-30 22:18:36.121640', 'step': 4737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:36.186560', 'step': 4737, 'epoch': 1} {'type': 'loss', 'content': 0.17306630313396454, 'timestamp': '2025-09-30 22:18:36.191643', 'step': 4738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.254560', 'step': 4738, 'epoch': 1} {'type': 'loss', 'content': 0.18515074253082275, 'timestamp': '2025-09-30 22:18:36.259493', 'step': 4739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:36.317953', 'step': 4739, 'epoch': 1} {'type': 'loss', 'content': 0.1243419349193573, 'timestamp': '2025-09-30 22:18:36.326893', 'step': 4740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:36.384794', 'step': 4740, 'epoch': 1} {'type': 'loss', 'content': 0.2420545071363449, 'timestamp': '2025-09-30 22:18:36.390501', 'step': 4741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.448758', 'step': 4741, 'epoch': 1} {'type': 'loss', 'content': 0.11240977793931961, 'timestamp': '2025-09-30 22:18:36.464453', 'step': 4742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.524250', 'step': 4742, 'epoch': 1} {'type': 'loss', 'content': 0.16852222383022308, 'timestamp': '2025-09-30 22:18:36.528526', 'step': 4743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:36.586584', 'step': 4743, 'epoch': 1} {'type': 'loss', 'content': 0.10781645774841309, 'timestamp': '2025-09-30 22:18:36.593592', 'step': 4744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:36.656501', 'step': 4744, 'epoch': 1} {'type': 'loss', 'content': 0.08529923856258392, 'timestamp': '2025-09-30 22:18:36.661056', 'step': 4745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.718703', 'step': 4745, 'epoch': 1} {'type': 'loss', 'content': 0.2659773528575897, 'timestamp': '2025-09-30 22:18:36.722995', 'step': 4746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.788166', 'step': 4746, 'epoch': 1} {'type': 'loss', 'content': 0.2884729504585266, 'timestamp': '2025-09-30 22:18:36.795510', 'step': 4747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:36.853256', 'step': 4747, 'epoch': 1} {'type': 'loss', 'content': 0.0981898233294487, 'timestamp': '2025-09-30 22:18:36.861221', 'step': 4748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:36.924725', 'step': 4748, 'epoch': 1} {'type': 'loss', 'content': 0.20030993223190308, 'timestamp': '2025-09-30 22:18:36.928109', 'step': 4749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:36.996852', 'step': 4749, 'epoch': 1} {'type': 'loss', 'content': 0.13044454157352448, 'timestamp': '2025-09-30 22:18:37.001467', 'step': 4750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:37.062838', 'step': 4750, 'epoch': 1} {'type': 'loss', 'content': 0.22399288415908813, 'timestamp': '2025-09-30 22:18:37.068636', 'step': 4751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:37.132732', 'step': 4751, 'epoch': 1} {'type': 'loss', 'content': 0.16872592270374298, 'timestamp': '2025-09-30 22:18:37.147621', 'step': 4752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:37.203885', 'step': 4752, 'epoch': 1} {'type': 'loss', 'content': 0.2513616979122162, 'timestamp': '2025-09-30 22:18:37.207990', 'step': 4753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:37.271502', 'step': 4753, 'epoch': 1} {'type': 'loss', 'content': 0.18592646718025208, 'timestamp': '2025-09-30 22:18:37.276286', 'step': 4754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:37.337237', 'step': 4754, 'epoch': 1} {'type': 'loss', 'content': 0.1841639280319214, 'timestamp': '2025-09-30 22:18:37.342007', 'step': 4755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:37.404714', 'step': 4755, 'epoch': 1} {'type': 'loss', 'content': 0.18399618566036224, 'timestamp': '2025-09-30 22:18:37.423617', 'step': 4756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:37.486199', 'step': 4756, 'epoch': 1} {'type': 'loss', 'content': 0.14737947285175323, 'timestamp': '2025-09-30 22:18:37.491102', 'step': 4757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:37.558017', 'step': 4757, 'epoch': 1} {'type': 'loss', 'content': 0.11704713851213455, 'timestamp': '2025-09-30 22:18:37.573703', 'step': 4758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:37.641502', 'step': 4758, 'epoch': 1} {'type': 'loss', 'content': 0.1747056096792221, 'timestamp': '2025-09-30 22:18:37.645275', 'step': 4759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:37.702980', 'step': 4759, 'epoch': 1} {'type': 'loss', 'content': 0.17061594128608704, 'timestamp': '2025-09-30 22:18:37.710158', 'step': 4760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:37.777349', 'step': 4760, 'epoch': 1} {'type': 'loss', 'content': 0.221471905708313, 'timestamp': '2025-09-30 22:18:37.782898', 'step': 4761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:37.843832', 'step': 4761, 'epoch': 1} {'type': 'loss', 'content': 0.09258251637220383, 'timestamp': '2025-09-30 22:18:37.847538', 'step': 4762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:37.918189', 'step': 4762, 'epoch': 1} {'type': 'loss', 'content': 0.06164174899458885, 'timestamp': '2025-09-30 22:18:37.924976', 'step': 4763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:37.982756', 'step': 4763, 'epoch': 1} {'type': 'loss', 'content': 0.14191146194934845, 'timestamp': '2025-09-30 22:18:37.990586', 'step': 4764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:38.061741', 'step': 4764, 'epoch': 1} {'type': 'loss', 'content': 0.15507400035858154, 'timestamp': '2025-09-30 22:18:38.065746', 'step': 4765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:38.122746', 'step': 4765, 'epoch': 1} {'type': 'loss', 'content': 0.3111228942871094, 'timestamp': '2025-09-30 22:18:38.127372', 'step': 4766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:38.185650', 'step': 4766, 'epoch': 1} {'type': 'loss', 'content': 0.2654920816421509, 'timestamp': '2025-09-30 22:18:38.194084', 'step': 4767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:38.253203', 'step': 4767, 'epoch': 1} {'type': 'loss', 'content': 0.22865840792655945, 'timestamp': '2025-09-30 22:18:38.261184', 'step': 4768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:38.317195', 'step': 4768, 'epoch': 1} {'type': 'loss', 'content': 0.1361595094203949, 'timestamp': '2025-09-30 22:18:38.322062', 'step': 4769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:38.379451', 'step': 4769, 'epoch': 1} {'type': 'loss', 'content': 0.2065819501876831, 'timestamp': '2025-09-30 22:18:38.385216', 'step': 4770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:38.443987', 'step': 4770, 'epoch': 1} {'type': 'loss', 'content': 0.12089382112026215, 'timestamp': '2025-09-30 22:18:38.448319', 'step': 4771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:38.506038', 'step': 4771, 'epoch': 1} {'type': 'loss', 'content': 0.11950293928384781, 'timestamp': '2025-09-30 22:18:38.514055', 'step': 4772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:38.570677', 'step': 4772, 'epoch': 1} {'type': 'loss', 'content': 0.19597592949867249, 'timestamp': '2025-09-30 22:18:38.574875', 'step': 4773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:38.632972', 'step': 4773, 'epoch': 1} {'type': 'loss', 'content': 0.1880885511636734, 'timestamp': '2025-09-30 22:18:38.637368', 'step': 4774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:38.704900', 'step': 4774, 'epoch': 1} {'type': 'loss', 'content': 0.19032934308052063, 'timestamp': '2025-09-30 22:18:38.709236', 'step': 4775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:38.772663', 'step': 4775, 'epoch': 1} {'type': 'loss', 'content': 0.08921688795089722, 'timestamp': '2025-09-30 22:18:38.782132', 'step': 4776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:38.853213', 'step': 4776, 'epoch': 1} {'type': 'loss', 'content': 0.12381835281848907, 'timestamp': '2025-09-30 22:18:38.857714', 'step': 4777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:38.921172', 'step': 4777, 'epoch': 1} {'type': 'loss', 'content': 0.1304762065410614, 'timestamp': '2025-09-30 22:18:38.924556', 'step': 4778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:38.982083', 'step': 4778, 'epoch': 1} {'type': 'loss', 'content': 0.1512245088815689, 'timestamp': '2025-09-30 22:18:38.986225', 'step': 4779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:39.051037', 'step': 4779, 'epoch': 1} {'type': 'loss', 'content': 0.17421524226665497, 'timestamp': '2025-09-30 22:18:39.059087', 'step': 4780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:39.118033', 'step': 4780, 'epoch': 1} {'type': 'loss', 'content': 0.2669237554073334, 'timestamp': '2025-09-30 22:18:39.124287', 'step': 4781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:39.182291', 'step': 4781, 'epoch': 1} {'type': 'loss', 'content': 0.2366577535867691, 'timestamp': '2025-09-30 22:18:39.187729', 'step': 4782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:39.245914', 'step': 4782, 'epoch': 1} {'type': 'loss', 'content': 0.11279722303152084, 'timestamp': '2025-09-30 22:18:39.250794', 'step': 4783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:39.307988', 'step': 4783, 'epoch': 1} {'type': 'loss', 'content': 0.23837772011756897, 'timestamp': '2025-09-30 22:18:39.317117', 'step': 4784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:39.375601', 'step': 4784, 'epoch': 1} {'type': 'loss', 'content': 0.12411447614431381, 'timestamp': '2025-09-30 22:18:39.380137', 'step': 4785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:39.438096', 'step': 4785, 'epoch': 1} {'type': 'loss', 'content': 0.17868132889270782, 'timestamp': '2025-09-30 22:18:39.455568', 'step': 4786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:39.514100', 'step': 4786, 'epoch': 1} {'type': 'loss', 'content': 0.13487453758716583, 'timestamp': '2025-09-30 22:18:39.517809', 'step': 4787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:39.575925', 'step': 4787, 'epoch': 1} {'type': 'loss', 'content': 0.12680919468402863, 'timestamp': '2025-09-30 22:18:39.583530', 'step': 4788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:39.648506', 'step': 4788, 'epoch': 1} {'type': 'loss', 'content': 0.13842900097370148, 'timestamp': '2025-09-30 22:18:39.663360', 'step': 4789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:39.731969', 'step': 4789, 'epoch': 1} {'type': 'loss', 'content': 0.16052338480949402, 'timestamp': '2025-09-30 22:18:39.743866', 'step': 4790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:39.809977', 'step': 4790, 'epoch': 1} {'type': 'loss', 'content': 0.15360383689403534, 'timestamp': '2025-09-30 22:18:39.824910', 'step': 4791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:39.894854', 'step': 4791, 'epoch': 1} {'type': 'loss', 'content': 0.24973531067371368, 'timestamp': '2025-09-30 22:18:39.902585', 'step': 4792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:39.965473', 'step': 4792, 'epoch': 1} {'type': 'loss', 'content': 0.28065964579582214, 'timestamp': '2025-09-30 22:18:39.977619', 'step': 4793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:40.034945', 'step': 4793, 'epoch': 1} {'type': 'loss', 'content': 0.16701683402061462, 'timestamp': '2025-09-30 22:18:40.040652', 'step': 4794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:40.099225', 'step': 4794, 'epoch': 1} {'type': 'loss', 'content': 0.19293799996376038, 'timestamp': '2025-09-30 22:18:40.102996', 'step': 4795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:40.162158', 'step': 4795, 'epoch': 1} {'type': 'loss', 'content': 0.12679733335971832, 'timestamp': '2025-09-30 22:18:40.170769', 'step': 4796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:40.228475', 'step': 4796, 'epoch': 1} {'type': 'loss', 'content': 0.17064844071865082, 'timestamp': '2025-09-30 22:18:40.232903', 'step': 4797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:40.292112', 'step': 4797, 'epoch': 1} {'type': 'loss', 'content': 0.14536868035793304, 'timestamp': '2025-09-30 22:18:40.296855', 'step': 4798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:40.355819', 'step': 4798, 'epoch': 1} {'type': 'loss', 'content': 0.16722719371318817, 'timestamp': '2025-09-30 22:18:40.360545', 'step': 4799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:40.429090', 'step': 4799, 'epoch': 1} {'type': 'loss', 'content': 0.16175620257854462, 'timestamp': '2025-09-30 22:18:40.436399', 'step': 4800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:40.494655', 'step': 4800, 'epoch': 1} {'type': 'loss', 'content': 0.13321849703788757, 'timestamp': '2025-09-30 22:18:40.498215', 'step': 4801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:40.556005', 'step': 4801, 'epoch': 1} {'type': 'loss', 'content': 0.1753770411014557, 'timestamp': '2025-09-30 22:18:40.559694', 'step': 4802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:40.617253', 'step': 4802, 'epoch': 1} {'type': 'loss', 'content': 0.1577938050031662, 'timestamp': '2025-09-30 22:18:40.620666', 'step': 4803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:40.677073', 'step': 4803, 'epoch': 1} {'type': 'loss', 'content': 0.1871134638786316, 'timestamp': '2025-09-30 22:18:40.684314', 'step': 4804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:40.741495', 'step': 4804, 'epoch': 1} {'type': 'loss', 'content': 0.2974744737148285, 'timestamp': '2025-09-30 22:18:40.744275', 'step': 4805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:40.817623', 'step': 4805, 'epoch': 1} {'type': 'loss', 'content': 0.1722557544708252, 'timestamp': '2025-09-30 22:18:40.821376', 'step': 4806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:40.878949', 'step': 4806, 'epoch': 1} {'type': 'loss', 'content': 0.08396375924348831, 'timestamp': '2025-09-30 22:18:40.882410', 'step': 4807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:40.939902', 'step': 4807, 'epoch': 1} {'type': 'loss', 'content': 0.16204023361206055, 'timestamp': '2025-09-30 22:18:40.946547', 'step': 4808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:41.002296', 'step': 4808, 'epoch': 1} {'type': 'loss', 'content': 0.1762600988149643, 'timestamp': '2025-09-30 22:18:41.017947', 'step': 4809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:41.076420', 'step': 4809, 'epoch': 1} {'type': 'loss', 'content': 0.17643916606903076, 'timestamp': '2025-09-30 22:18:41.080708', 'step': 4810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:41.139360', 'step': 4810, 'epoch': 1} {'type': 'loss', 'content': 0.12654569745063782, 'timestamp': '2025-09-30 22:18:41.146342', 'step': 4811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:41.217603', 'step': 4811, 'epoch': 1} {'type': 'loss', 'content': 0.17761094868183136, 'timestamp': '2025-09-30 22:18:41.240080', 'step': 4812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:41.300101', 'step': 4812, 'epoch': 1} {'type': 'loss', 'content': 0.2635818421840668, 'timestamp': '2025-09-30 22:18:41.307073', 'step': 4813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:41.364795', 'step': 4813, 'epoch': 1} {'type': 'loss', 'content': 0.23767951130867004, 'timestamp': '2025-09-30 22:18:41.369802', 'step': 4814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:41.428132', 'step': 4814, 'epoch': 1} {'type': 'loss', 'content': 0.11224204301834106, 'timestamp': '2025-09-30 22:18:41.433376', 'step': 4815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:41.490631', 'step': 4815, 'epoch': 1} {'type': 'loss', 'content': 0.17185820639133453, 'timestamp': '2025-09-30 22:18:41.498611', 'step': 4816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:41.563833', 'step': 4816, 'epoch': 1} {'type': 'loss', 'content': 0.16634851694107056, 'timestamp': '2025-09-30 22:18:41.578259', 'step': 4817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:41.647180', 'step': 4817, 'epoch': 1} {'type': 'loss', 'content': 0.13432584702968597, 'timestamp': '2025-09-30 22:18:41.651634', 'step': 4818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:41.723122', 'step': 4818, 'epoch': 1} {'type': 'loss', 'content': 0.0975155457854271, 'timestamp': '2025-09-30 22:18:41.727844', 'step': 4819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:41.789413', 'step': 4819, 'epoch': 1} {'type': 'loss', 'content': 0.15906791388988495, 'timestamp': '2025-09-30 22:18:41.799297', 'step': 4820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:41.862877', 'step': 4820, 'epoch': 1} {'type': 'loss', 'content': 0.1497376412153244, 'timestamp': '2025-09-30 22:18:41.868542', 'step': 4821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:41.925555', 'step': 4821, 'epoch': 1} {'type': 'loss', 'content': 0.18204817175865173, 'timestamp': '2025-09-30 22:18:41.929575', 'step': 4822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:41.987998', 'step': 4822, 'epoch': 1} {'type': 'loss', 'content': 0.1057397797703743, 'timestamp': '2025-09-30 22:18:41.995167', 'step': 4823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:42.057483', 'step': 4823, 'epoch': 1} {'type': 'loss', 'content': 0.11639241129159927, 'timestamp': '2025-09-30 22:18:42.070474', 'step': 4824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:42.126562', 'step': 4824, 'epoch': 1} {'type': 'loss', 'content': 0.09426271170377731, 'timestamp': '2025-09-30 22:18:42.139476', 'step': 4825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:42.200461', 'step': 4825, 'epoch': 1} {'type': 'loss', 'content': 0.10164592415094376, 'timestamp': '2025-09-30 22:18:42.204189', 'step': 4826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:42.261968', 'step': 4826, 'epoch': 1} {'type': 'loss', 'content': 0.09057166427373886, 'timestamp': '2025-09-30 22:18:42.266329', 'step': 4827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:42.327839', 'step': 4827, 'epoch': 1} {'type': 'loss', 'content': 0.12627144157886505, 'timestamp': '2025-09-30 22:18:42.335280', 'step': 4828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:42.400525', 'step': 4828, 'epoch': 1} {'type': 'loss', 'content': 0.1662701517343521, 'timestamp': '2025-09-30 22:18:42.412617', 'step': 4829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:42.478126', 'step': 4829, 'epoch': 1} {'type': 'loss', 'content': 0.161931112408638, 'timestamp': '2025-09-30 22:18:42.486350', 'step': 4830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:42.542931', 'step': 4830, 'epoch': 1} {'type': 'loss', 'content': 0.1470094621181488, 'timestamp': '2025-09-30 22:18:42.553019', 'step': 4831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:42.627135', 'step': 4831, 'epoch': 1} {'type': 'loss', 'content': 0.20265623927116394, 'timestamp': '2025-09-30 22:18:42.637466', 'step': 4832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:42.694777', 'step': 4832, 'epoch': 1} {'type': 'loss', 'content': 0.11750750988721848, 'timestamp': '2025-09-30 22:18:42.698372', 'step': 4833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:42.755136', 'step': 4833, 'epoch': 1} {'type': 'loss', 'content': 0.15858948230743408, 'timestamp': '2025-09-30 22:18:42.765037', 'step': 4834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:42.822497', 'step': 4834, 'epoch': 1} {'type': 'loss', 'content': 0.10836675763130188, 'timestamp': '2025-09-30 22:18:42.825756', 'step': 4835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:42.905204', 'step': 4835, 'epoch': 1} {'type': 'loss', 'content': 0.13029801845550537, 'timestamp': '2025-09-30 22:18:42.912084', 'step': 4836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:42.977765', 'step': 4836, 'epoch': 1} {'type': 'loss', 'content': 0.07449901103973389, 'timestamp': '2025-09-30 22:18:42.988772', 'step': 4837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:43.051637', 'step': 4837, 'epoch': 1} {'type': 'loss', 'content': 0.1941182166337967, 'timestamp': '2025-09-30 22:18:43.062883', 'step': 4838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:43.128191', 'step': 4838, 'epoch': 1} {'type': 'loss', 'content': 0.24357669055461884, 'timestamp': '2025-09-30 22:18:43.133635', 'step': 4839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:43.193602', 'step': 4839, 'epoch': 1} {'type': 'loss', 'content': 0.13535715639591217, 'timestamp': '2025-09-30 22:18:43.201049', 'step': 4840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:43.264175', 'step': 4840, 'epoch': 1} {'type': 'loss', 'content': 0.12880614399909973, 'timestamp': '2025-09-30 22:18:43.281093', 'step': 4841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:43.338163', 'step': 4841, 'epoch': 1} {'type': 'loss', 'content': 0.15291771292686462, 'timestamp': '2025-09-30 22:18:43.341982', 'step': 4842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:43.407873', 'step': 4842, 'epoch': 1} {'type': 'loss', 'content': 0.2367098480463028, 'timestamp': '2025-09-30 22:18:43.413331', 'step': 4843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:43.472421', 'step': 4843, 'epoch': 1} {'type': 'loss', 'content': 0.14443010091781616, 'timestamp': '2025-09-30 22:18:43.480082', 'step': 4844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:43.549433', 'step': 4844, 'epoch': 1} {'type': 'loss', 'content': 0.15278464555740356, 'timestamp': '2025-09-30 22:18:43.553330', 'step': 4845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:43.612896', 'step': 4845, 'epoch': 1} {'type': 'loss', 'content': 0.07835198938846588, 'timestamp': '2025-09-30 22:18:43.616680', 'step': 4846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:43.676583', 'step': 4846, 'epoch': 1} {'type': 'loss', 'content': 0.16004693508148193, 'timestamp': '2025-09-30 22:18:43.679948', 'step': 4847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:43.740161', 'step': 4847, 'epoch': 1} {'type': 'loss', 'content': 0.1561318337917328, 'timestamp': '2025-09-30 22:18:43.747963', 'step': 4848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:43.806766', 'step': 4848, 'epoch': 1} {'type': 'loss', 'content': 0.2791930139064789, 'timestamp': '2025-09-30 22:18:43.815691', 'step': 4849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:43.878382', 'step': 4849, 'epoch': 1} {'type': 'loss', 'content': 0.123909130692482, 'timestamp': '2025-09-30 22:18:43.881771', 'step': 4850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:43.941924', 'step': 4850, 'epoch': 1} {'type': 'loss', 'content': 0.12841738760471344, 'timestamp': '2025-09-30 22:18:43.946531', 'step': 4851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:44.009003', 'step': 4851, 'epoch': 1} {'type': 'loss', 'content': 0.20679856836795807, 'timestamp': '2025-09-30 22:18:44.016694', 'step': 4852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:44.084678', 'step': 4852, 'epoch': 1} {'type': 'loss', 'content': 0.13606302440166473, 'timestamp': '2025-09-30 22:18:44.091354', 'step': 4853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:44.150742', 'step': 4853, 'epoch': 1} {'type': 'loss', 'content': 0.15516574680805206, 'timestamp': '2025-09-30 22:18:44.155204', 'step': 4854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:44.217355', 'step': 4854, 'epoch': 1} {'type': 'loss', 'content': 0.17923122644424438, 'timestamp': '2025-09-30 22:18:44.234435', 'step': 4855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:44.295890', 'step': 4855, 'epoch': 1} {'type': 'loss', 'content': 0.15410254895687103, 'timestamp': '2025-09-30 22:18:44.305115', 'step': 4856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:44.386180', 'step': 4856, 'epoch': 1} {'type': 'loss', 'content': 0.08047410845756531, 'timestamp': '2025-09-30 22:18:44.392209', 'step': 4857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:44.459847', 'step': 4857, 'epoch': 1} {'type': 'loss', 'content': 0.34998857975006104, 'timestamp': '2025-09-30 22:18:44.464990', 'step': 4858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:44.528664', 'step': 4858, 'epoch': 1} {'type': 'loss', 'content': 0.21457353234291077, 'timestamp': '2025-09-30 22:18:44.537016', 'step': 4859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:44.600613', 'step': 4859, 'epoch': 1} {'type': 'loss', 'content': 0.11695219576358795, 'timestamp': '2025-09-30 22:18:44.608094', 'step': 4860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:44.665591', 'step': 4860, 'epoch': 1} {'type': 'loss', 'content': 0.24293237924575806, 'timestamp': '2025-09-30 22:18:44.673193', 'step': 4861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:44.733101', 'step': 4861, 'epoch': 1} {'type': 'loss', 'content': 0.14776282012462616, 'timestamp': '2025-09-30 22:18:44.743464', 'step': 4862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:44.799839', 'step': 4862, 'epoch': 1} {'type': 'loss', 'content': 0.2869906425476074, 'timestamp': '2025-09-30 22:18:44.803475', 'step': 4863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:44.860241', 'step': 4863, 'epoch': 1} {'type': 'loss', 'content': 0.10632495582103729, 'timestamp': '2025-09-30 22:18:44.867906', 'step': 4864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:44.923988', 'step': 4864, 'epoch': 1} {'type': 'loss', 'content': 0.136501282453537, 'timestamp': '2025-09-30 22:18:44.927656', 'step': 4865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:18:44.987934', 'step': 4865, 'epoch': 1} {'type': 'loss', 'content': 0.21984462440013885, 'timestamp': '2025-09-30 22:18:44.991541', 'step': 4866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:45.075192', 'step': 4866, 'epoch': 1} {'type': 'loss', 'content': 0.18190747499465942, 'timestamp': '2025-09-30 22:18:45.078590', 'step': 4867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:45.139268', 'step': 4867, 'epoch': 1} {'type': 'loss', 'content': 0.19340525567531586, 'timestamp': '2025-09-30 22:18:45.146081', 'step': 4868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:45.203836', 'step': 4868, 'epoch': 1} {'type': 'loss', 'content': 0.27346986532211304, 'timestamp': '2025-09-30 22:18:45.207431', 'step': 4869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:45.269599', 'step': 4869, 'epoch': 1} {'type': 'loss', 'content': 0.19534385204315186, 'timestamp': '2025-09-30 22:18:45.280620', 'step': 4870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:45.343534', 'step': 4870, 'epoch': 1} {'type': 'loss', 'content': 0.16869592666625977, 'timestamp': '2025-09-30 22:18:45.348023', 'step': 4871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:45.407114', 'step': 4871, 'epoch': 1} {'type': 'loss', 'content': 0.12784141302108765, 'timestamp': '2025-09-30 22:18:45.413931', 'step': 4872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:45.470464', 'step': 4872, 'epoch': 1} {'type': 'loss', 'content': 0.12764328718185425, 'timestamp': '2025-09-30 22:18:45.480806', 'step': 4873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:45.537068', 'step': 4873, 'epoch': 1} {'type': 'loss', 'content': 0.19715353846549988, 'timestamp': '2025-09-30 22:18:45.540459', 'step': 4874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:45.597839', 'step': 4874, 'epoch': 1} {'type': 'loss', 'content': 0.1637457311153412, 'timestamp': '2025-09-30 22:18:45.609635', 'step': 4875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:45.667167', 'step': 4875, 'epoch': 1} {'type': 'loss', 'content': 0.14465190470218658, 'timestamp': '2025-09-30 22:18:45.673845', 'step': 4876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:45.735307', 'step': 4876, 'epoch': 1} {'type': 'loss', 'content': 0.1886659413576126, 'timestamp': '2025-09-30 22:18:45.738134', 'step': 4877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:45.794942', 'step': 4877, 'epoch': 1} {'type': 'loss', 'content': 0.10580725967884064, 'timestamp': '2025-09-30 22:18:45.798740', 'step': 4878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:45.863502', 'step': 4878, 'epoch': 1} {'type': 'loss', 'content': 0.24619217216968536, 'timestamp': '2025-09-30 22:18:45.866451', 'step': 4879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:45.928192', 'step': 4879, 'epoch': 1} {'type': 'loss', 'content': 0.1449510008096695, 'timestamp': '2025-09-30 22:18:45.935842', 'step': 4880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:46.000185', 'step': 4880, 'epoch': 1} {'type': 'loss', 'content': 0.21319922804832458, 'timestamp': '2025-09-30 22:18:46.013398', 'step': 4881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:46.070999', 'step': 4881, 'epoch': 1} {'type': 'loss', 'content': 0.20385584235191345, 'timestamp': '2025-09-30 22:18:46.074408', 'step': 4882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:46.132632', 'step': 4882, 'epoch': 1} {'type': 'loss', 'content': 0.11741343140602112, 'timestamp': '2025-09-30 22:18:46.135628', 'step': 4883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:46.199527', 'step': 4883, 'epoch': 1} {'type': 'loss', 'content': 0.16170397400856018, 'timestamp': '2025-09-30 22:18:46.206154', 'step': 4884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:46.267171', 'step': 4884, 'epoch': 1} {'type': 'loss', 'content': 0.1364036649465561, 'timestamp': '2025-09-30 22:18:46.270642', 'step': 4885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:46.328390', 'step': 4885, 'epoch': 1} {'type': 'loss', 'content': 0.20034562051296234, 'timestamp': '2025-09-30 22:18:46.332874', 'step': 4886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:46.391278', 'step': 4886, 'epoch': 1} {'type': 'loss', 'content': 0.20642687380313873, 'timestamp': '2025-09-30 22:18:46.395519', 'step': 4887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:46.453195', 'step': 4887, 'epoch': 1} {'type': 'loss', 'content': 0.16502134501934052, 'timestamp': '2025-09-30 22:18:46.460230', 'step': 4888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:46.517667', 'step': 4888, 'epoch': 1} {'type': 'loss', 'content': 0.20922234654426575, 'timestamp': '2025-09-30 22:18:46.521646', 'step': 4889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:46.578408', 'step': 4889, 'epoch': 1} {'type': 'loss', 'content': 0.20576569437980652, 'timestamp': '2025-09-30 22:18:46.582383', 'step': 4890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:46.638718', 'step': 4890, 'epoch': 1} {'type': 'loss', 'content': 0.17175078392028809, 'timestamp': '2025-09-30 22:18:46.642742', 'step': 4891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:46.700498', 'step': 4891, 'epoch': 1} {'type': 'loss', 'content': 0.1155010536313057, 'timestamp': '2025-09-30 22:18:46.715959', 'step': 4892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:46.774916', 'step': 4892, 'epoch': 1} {'type': 'loss', 'content': 0.22811563313007355, 'timestamp': '2025-09-30 22:18:46.778656', 'step': 4893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:46.846962', 'step': 4893, 'epoch': 1} {'type': 'loss', 'content': 0.18982376158237457, 'timestamp': '2025-09-30 22:18:46.850115', 'step': 4894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:46.914899', 'step': 4894, 'epoch': 1} {'type': 'loss', 'content': 0.23618921637535095, 'timestamp': '2025-09-30 22:18:46.917734', 'step': 4895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:46.976920', 'step': 4895, 'epoch': 1} {'type': 'loss', 'content': 0.12239761650562286, 'timestamp': '2025-09-30 22:18:46.985574', 'step': 4896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:47.051588', 'step': 4896, 'epoch': 1} {'type': 'loss', 'content': 0.17053674161434174, 'timestamp': '2025-09-30 22:18:47.054642', 'step': 4897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:47.114133', 'step': 4897, 'epoch': 1} {'type': 'loss', 'content': 0.09514840692281723, 'timestamp': '2025-09-30 22:18:47.124934', 'step': 4898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:47.202872', 'step': 4898, 'epoch': 1} {'type': 'loss', 'content': 0.19901680946350098, 'timestamp': '2025-09-30 22:18:47.206708', 'step': 4899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:47.273871', 'step': 4899, 'epoch': 1} {'type': 'loss', 'content': 0.20208391547203064, 'timestamp': '2025-09-30 22:18:47.288636', 'step': 4900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:47.346335', 'step': 4900, 'epoch': 1} {'type': 'loss', 'content': 0.133289635181427, 'timestamp': '2025-09-30 22:18:47.351034', 'step': 4901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:47.409869', 'step': 4901, 'epoch': 1} {'type': 'loss', 'content': 0.17465057969093323, 'timestamp': '2025-09-30 22:18:47.413505', 'step': 4902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:47.480897', 'step': 4902, 'epoch': 1} {'type': 'loss', 'content': 0.1672670543193817, 'timestamp': '2025-09-30 22:18:47.493788', 'step': 4903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:47.550583', 'step': 4903, 'epoch': 1} {'type': 'loss', 'content': 0.1821376383304596, 'timestamp': '2025-09-30 22:18:47.566841', 'step': 4904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:47.624670', 'step': 4904, 'epoch': 1} {'type': 'loss', 'content': 0.16147303581237793, 'timestamp': '2025-09-30 22:18:47.628351', 'step': 4905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:47.696173', 'step': 4905, 'epoch': 1} {'type': 'loss', 'content': 0.15761218965053558, 'timestamp': '2025-09-30 22:18:47.701684', 'step': 4906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:47.758698', 'step': 4906, 'epoch': 1} {'type': 'loss', 'content': 0.13996285200119019, 'timestamp': '2025-09-30 22:18:47.763149', 'step': 4907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:47.819532', 'step': 4907, 'epoch': 1} {'type': 'loss', 'content': 0.16182689368724823, 'timestamp': '2025-09-30 22:18:47.827436', 'step': 4908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:47.891896', 'step': 4908, 'epoch': 1} {'type': 'loss', 'content': 0.1488860696554184, 'timestamp': '2025-09-30 22:18:47.896873', 'step': 4909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:47.965327', 'step': 4909, 'epoch': 1} {'type': 'loss', 'content': 0.09791137278079987, 'timestamp': '2025-09-30 22:18:47.968854', 'step': 4910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.025978', 'step': 4910, 'epoch': 1} {'type': 'loss', 'content': 0.2323639839887619, 'timestamp': '2025-09-30 22:18:48.030678', 'step': 4911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.088837', 'step': 4911, 'epoch': 1} {'type': 'loss', 'content': 0.19044792652130127, 'timestamp': '2025-09-30 22:18:48.096571', 'step': 4912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:48.152748', 'step': 4912, 'epoch': 1} {'type': 'loss', 'content': 0.14631573855876923, 'timestamp': '2025-09-30 22:18:48.160878', 'step': 4913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.218065', 'step': 4913, 'epoch': 1} {'type': 'loss', 'content': 0.16518564522266388, 'timestamp': '2025-09-30 22:18:48.223559', 'step': 4914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:48.280768', 'step': 4914, 'epoch': 1} {'type': 'loss', 'content': 0.18752379715442657, 'timestamp': '2025-09-30 22:18:48.294661', 'step': 4915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.359048', 'step': 4915, 'epoch': 1} {'type': 'loss', 'content': 0.1357685923576355, 'timestamp': '2025-09-30 22:18:48.366735', 'step': 4916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.424595', 'step': 4916, 'epoch': 1} {'type': 'loss', 'content': 0.16183209419250488, 'timestamp': '2025-09-30 22:18:48.428355', 'step': 4917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.485133', 'step': 4917, 'epoch': 1} {'type': 'loss', 'content': 0.11981689929962158, 'timestamp': '2025-09-30 22:18:48.489643', 'step': 4918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:48.547209', 'step': 4918, 'epoch': 1} {'type': 'loss', 'content': 0.14587123692035675, 'timestamp': '2025-09-30 22:18:48.552158', 'step': 4919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.618014', 'step': 4919, 'epoch': 1} {'type': 'loss', 'content': 0.13241997361183167, 'timestamp': '2025-09-30 22:18:48.634532', 'step': 4920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:48.691529', 'step': 4920, 'epoch': 1} {'type': 'loss', 'content': 0.16917970776557922, 'timestamp': '2025-09-30 22:18:48.706300', 'step': 4921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:48.763603', 'step': 4921, 'epoch': 1} {'type': 'loss', 'content': 0.11225305497646332, 'timestamp': '2025-09-30 22:18:48.767099', 'step': 4922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:48.825203', 'step': 4922, 'epoch': 1} {'type': 'loss', 'content': 0.1865386664867401, 'timestamp': '2025-09-30 22:18:48.829095', 'step': 4923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:48.886055', 'step': 4923, 'epoch': 1} {'type': 'loss', 'content': 0.12553925812244415, 'timestamp': '2025-09-30 22:18:48.894016', 'step': 4924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:48.950798', 'step': 4924, 'epoch': 1} {'type': 'loss', 'content': 0.13661403954029083, 'timestamp': '2025-09-30 22:18:48.954025', 'step': 4925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:49.011289', 'step': 4925, 'epoch': 1} {'type': 'loss', 'content': 0.15414656698703766, 'timestamp': '2025-09-30 22:18:49.014877', 'step': 4926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:49.073032', 'step': 4926, 'epoch': 1} {'type': 'loss', 'content': 0.27263084053993225, 'timestamp': '2025-09-30 22:18:49.076250', 'step': 4927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:49.133956', 'step': 4927, 'epoch': 1} {'type': 'loss', 'content': 0.1627204418182373, 'timestamp': '2025-09-30 22:18:49.141357', 'step': 4928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:49.199098', 'step': 4928, 'epoch': 1} {'type': 'loss', 'content': 0.20130231976509094, 'timestamp': '2025-09-30 22:18:49.210163', 'step': 4929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:49.278848', 'step': 4929, 'epoch': 1} {'type': 'loss', 'content': 0.14053061604499817, 'timestamp': '2025-09-30 22:18:49.287518', 'step': 4930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:49.345544', 'step': 4930, 'epoch': 1} {'type': 'loss', 'content': 0.13574370741844177, 'timestamp': '2025-09-30 22:18:49.348917', 'step': 4931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:49.434944', 'step': 4931, 'epoch': 1} {'type': 'loss', 'content': 0.1367940455675125, 'timestamp': '2025-09-30 22:18:49.442474', 'step': 4932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:49.506101', 'step': 4932, 'epoch': 1} {'type': 'loss', 'content': 0.09530215710401535, 'timestamp': '2025-09-30 22:18:49.510569', 'step': 4933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:49.568872', 'step': 4933, 'epoch': 1} {'type': 'loss', 'content': 0.1673227995634079, 'timestamp': '2025-09-30 22:18:49.572632', 'step': 4934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:49.630442', 'step': 4934, 'epoch': 1} {'type': 'loss', 'content': 0.15979307889938354, 'timestamp': '2025-09-30 22:18:49.633339', 'step': 4935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:49.695528', 'step': 4935, 'epoch': 1} {'type': 'loss', 'content': 0.20138287544250488, 'timestamp': '2025-09-30 22:18:49.702497', 'step': 4936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:49.767952', 'step': 4936, 'epoch': 1} {'type': 'loss', 'content': 0.17066366970539093, 'timestamp': '2025-09-30 22:18:49.771322', 'step': 4937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:49.830401', 'step': 4937, 'epoch': 1} {'type': 'loss', 'content': 0.1573067158460617, 'timestamp': '2025-09-30 22:18:49.841479', 'step': 4938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:49.909367', 'step': 4938, 'epoch': 1} {'type': 'loss', 'content': 0.17860828340053558, 'timestamp': '2025-09-30 22:18:49.913064', 'step': 4939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:49.970708', 'step': 4939, 'epoch': 1} {'type': 'loss', 'content': 0.11096605658531189, 'timestamp': '2025-09-30 22:18:49.983560', 'step': 4940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:50.040160', 'step': 4940, 'epoch': 1} {'type': 'loss', 'content': 0.12679456174373627, 'timestamp': '2025-09-30 22:18:50.044329', 'step': 4941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:50.110611', 'step': 4941, 'epoch': 1} {'type': 'loss', 'content': 0.23286353051662445, 'timestamp': '2025-09-30 22:18:50.113863', 'step': 4942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:50.171897', 'step': 4942, 'epoch': 1} {'type': 'loss', 'content': 0.12945343554019928, 'timestamp': '2025-09-30 22:18:50.175247', 'step': 4943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:50.240048', 'step': 4943, 'epoch': 1} {'type': 'loss', 'content': 0.26925355195999146, 'timestamp': '2025-09-30 22:18:50.247905', 'step': 4944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:50.318972', 'step': 4944, 'epoch': 1} {'type': 'loss', 'content': 0.15732210874557495, 'timestamp': '2025-09-30 22:18:50.321810', 'step': 4945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:50.378255', 'step': 4945, 'epoch': 1} {'type': 'loss', 'content': 0.34073519706726074, 'timestamp': '2025-09-30 22:18:50.383438', 'step': 4946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:50.440941', 'step': 4946, 'epoch': 1} {'type': 'loss', 'content': 0.1985183209180832, 'timestamp': '2025-09-30 22:18:50.444260', 'step': 4947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:50.501197', 'step': 4947, 'epoch': 1} {'type': 'loss', 'content': 0.1733221560716629, 'timestamp': '2025-09-30 22:18:50.514110', 'step': 4948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:50.572022', 'step': 4948, 'epoch': 1} {'type': 'loss', 'content': 0.1839280128479004, 'timestamp': '2025-09-30 22:18:50.575587', 'step': 4949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:50.637450', 'step': 4949, 'epoch': 1} {'type': 'loss', 'content': 0.10348108410835266, 'timestamp': '2025-09-30 22:18:50.641107', 'step': 4950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:50.699100', 'step': 4950, 'epoch': 1} {'type': 'loss', 'content': 0.14592623710632324, 'timestamp': '2025-09-30 22:18:50.702971', 'step': 4951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:50.760309', 'step': 4951, 'epoch': 1} {'type': 'loss', 'content': 0.16248196363449097, 'timestamp': '2025-09-30 22:18:50.767548', 'step': 4952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:50.825133', 'step': 4952, 'epoch': 1} {'type': 'loss', 'content': 0.147232785820961, 'timestamp': '2025-09-30 22:18:50.828349', 'step': 4953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:50.886333', 'step': 4953, 'epoch': 1} {'type': 'loss', 'content': 0.13850483298301697, 'timestamp': '2025-09-30 22:18:50.890546', 'step': 4954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:50.948157', 'step': 4954, 'epoch': 1} {'type': 'loss', 'content': 0.15330256521701813, 'timestamp': '2025-09-30 22:18:50.952407', 'step': 4955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:51.011208', 'step': 4955, 'epoch': 1} {'type': 'loss', 'content': 0.25930070877075195, 'timestamp': '2025-09-30 22:18:51.017439', 'step': 4956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:51.074456', 'step': 4956, 'epoch': 1} {'type': 'loss', 'content': 0.11733151227235794, 'timestamp': '2025-09-30 22:18:51.077565', 'step': 4957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:51.135902', 'step': 4957, 'epoch': 1} {'type': 'loss', 'content': 0.16816353797912598, 'timestamp': '2025-09-30 22:18:51.145594', 'step': 4958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:51.204208', 'step': 4958, 'epoch': 1} {'type': 'loss', 'content': 0.1965307742357254, 'timestamp': '2025-09-30 22:18:51.208556', 'step': 4959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:51.266225', 'step': 4959, 'epoch': 1} {'type': 'loss', 'content': 0.14510734379291534, 'timestamp': '2025-09-30 22:18:51.272402', 'step': 4960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:51.330711', 'step': 4960, 'epoch': 1} {'type': 'loss', 'content': 0.13929042220115662, 'timestamp': '2025-09-30 22:18:51.333653', 'step': 4961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:51.392515', 'step': 4961, 'epoch': 1} {'type': 'loss', 'content': 0.18815413117408752, 'timestamp': '2025-09-30 22:18:51.395988', 'step': 4962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:51.453457', 'step': 4962, 'epoch': 1} {'type': 'loss', 'content': 0.13567888736724854, 'timestamp': '2025-09-30 22:18:51.457570', 'step': 4963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:51.514288', 'step': 4963, 'epoch': 1} {'type': 'loss', 'content': 0.11774811893701553, 'timestamp': '2025-09-30 22:18:51.521371', 'step': 4964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:51.579112', 'step': 4964, 'epoch': 1} {'type': 'loss', 'content': 0.11331597715616226, 'timestamp': '2025-09-30 22:18:51.584337', 'step': 4965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:51.641479', 'step': 4965, 'epoch': 1} {'type': 'loss', 'content': 0.19731464982032776, 'timestamp': '2025-09-30 22:18:51.645248', 'step': 4966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:51.708699', 'step': 4966, 'epoch': 1} {'type': 'loss', 'content': 0.17817352712154388, 'timestamp': '2025-09-30 22:18:51.711940', 'step': 4967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:51.769909', 'step': 4967, 'epoch': 1} {'type': 'loss', 'content': 0.08541633188724518, 'timestamp': '2025-09-30 22:18:51.777468', 'step': 4968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:51.835658', 'step': 4968, 'epoch': 1} {'type': 'loss', 'content': 0.23781630396842957, 'timestamp': '2025-09-30 22:18:51.844585', 'step': 4969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:51.902383', 'step': 4969, 'epoch': 1} {'type': 'loss', 'content': 0.16555416584014893, 'timestamp': '2025-09-30 22:18:51.914141', 'step': 4970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:51.973196', 'step': 4970, 'epoch': 1} {'type': 'loss', 'content': 0.19258494675159454, 'timestamp': '2025-09-30 22:18:51.976874', 'step': 4971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:52.034356', 'step': 4971, 'epoch': 1} {'type': 'loss', 'content': 0.09825915098190308, 'timestamp': '2025-09-30 22:18:52.042286', 'step': 4972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:52.099916', 'step': 4972, 'epoch': 1} {'type': 'loss', 'content': 0.1629105806350708, 'timestamp': '2025-09-30 22:18:52.103880', 'step': 4973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:52.163164', 'step': 4973, 'epoch': 1} {'type': 'loss', 'content': 0.22373229265213013, 'timestamp': '2025-09-30 22:18:52.166638', 'step': 4974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:52.225375', 'step': 4974, 'epoch': 1} {'type': 'loss', 'content': 0.11466576159000397, 'timestamp': '2025-09-30 22:18:52.229926', 'step': 4975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:52.302998', 'step': 4975, 'epoch': 1} {'type': 'loss', 'content': 0.09939718246459961, 'timestamp': '2025-09-30 22:18:52.313094', 'step': 4976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:52.369510', 'step': 4976, 'epoch': 1} {'type': 'loss', 'content': 0.17593227326869965, 'timestamp': '2025-09-30 22:18:52.373334', 'step': 4977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:52.434430', 'step': 4977, 'epoch': 1} {'type': 'loss', 'content': 0.15429715812206268, 'timestamp': '2025-09-30 22:18:52.437902', 'step': 4978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:52.505968', 'step': 4978, 'epoch': 1} {'type': 'loss', 'content': 0.27733373641967773, 'timestamp': '2025-09-30 22:18:52.516540', 'step': 4979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:52.575330', 'step': 4979, 'epoch': 1} {'type': 'loss', 'content': 0.1324373483657837, 'timestamp': '2025-09-30 22:18:52.581841', 'step': 4980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:52.637993', 'step': 4980, 'epoch': 1} {'type': 'loss', 'content': 0.15121497213840485, 'timestamp': '2025-09-30 22:18:52.641864', 'step': 4981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:52.712198', 'step': 4981, 'epoch': 1} {'type': 'loss', 'content': 0.1210952177643776, 'timestamp': '2025-09-30 22:18:52.716479', 'step': 4982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:52.788774', 'step': 4982, 'epoch': 1} {'type': 'loss', 'content': 0.1071668341755867, 'timestamp': '2025-09-30 22:18:52.794907', 'step': 4983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:52.854725', 'step': 4983, 'epoch': 1} {'type': 'loss', 'content': 0.20269209146499634, 'timestamp': '2025-09-30 22:18:52.862748', 'step': 4984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:52.921514', 'step': 4984, 'epoch': 1} {'type': 'loss', 'content': 0.24069492518901825, 'timestamp': '2025-09-30 22:18:52.927958', 'step': 4985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:53.000452', 'step': 4985, 'epoch': 1} {'type': 'loss', 'content': 0.18297837674617767, 'timestamp': '2025-09-30 22:18:53.005542', 'step': 4986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:53.063540', 'step': 4986, 'epoch': 1} {'type': 'loss', 'content': 0.16223879158496857, 'timestamp': '2025-09-30 22:18:53.067771', 'step': 4987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:53.131469', 'step': 4987, 'epoch': 1} {'type': 'loss', 'content': 0.11948519945144653, 'timestamp': '2025-09-30 22:18:53.142868', 'step': 4988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:53.206777', 'step': 4988, 'epoch': 1} {'type': 'loss', 'content': 0.0974273830652237, 'timestamp': '2025-09-30 22:18:53.209946', 'step': 4989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:53.272983', 'step': 4989, 'epoch': 1} {'type': 'loss', 'content': 0.13693436980247498, 'timestamp': '2025-09-30 22:18:53.277819', 'step': 4990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:53.334460', 'step': 4990, 'epoch': 1} {'type': 'loss', 'content': 0.22447146475315094, 'timestamp': '2025-09-30 22:18:53.338745', 'step': 4991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:53.395957', 'step': 4991, 'epoch': 1} {'type': 'loss', 'content': 0.21000422537326813, 'timestamp': '2025-09-30 22:18:53.403486', 'step': 4992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:53.466802', 'step': 4992, 'epoch': 1} {'type': 'loss', 'content': 0.14811943471431732, 'timestamp': '2025-09-30 22:18:53.470713', 'step': 4993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:53.542588', 'step': 4993, 'epoch': 1} {'type': 'loss', 'content': 0.2528250217437744, 'timestamp': '2025-09-30 22:18:53.548641', 'step': 4994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:53.611990', 'step': 4994, 'epoch': 1} {'type': 'loss', 'content': 0.1520313322544098, 'timestamp': '2025-09-30 22:18:53.616250', 'step': 4995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:53.672895', 'step': 4995, 'epoch': 1} {'type': 'loss', 'content': 0.14581668376922607, 'timestamp': '2025-09-30 22:18:53.679134', 'step': 4996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:53.735627', 'step': 4996, 'epoch': 1} {'type': 'loss', 'content': 0.1558157503604889, 'timestamp': '2025-09-30 22:18:53.739641', 'step': 4997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:53.809301', 'step': 4997, 'epoch': 1} {'type': 'loss', 'content': 0.16342037916183472, 'timestamp': '2025-09-30 22:18:53.813909', 'step': 4998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:53.879617', 'step': 4998, 'epoch': 1} {'type': 'loss', 'content': 0.23842135071754456, 'timestamp': '2025-09-30 22:18:53.883791', 'step': 4999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:18:53.941179', 'step': 4999, 'epoch': 1} {'type': 'loss', 'content': 0.13224391639232635, 'timestamp': '2025-09-30 22:18:53.948318', 'step': 5000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-30 22:18:54.352551', 'step': 5000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:54.409140', 'step': 5000, 'epoch': 1} {'type': 'loss', 'content': 0.13129107654094696, 'timestamp': '2025-09-30 22:18:54.416186', 'step': 5001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:54.475427', 'step': 5001, 'epoch': 1} {'type': 'loss', 'content': 0.12939631938934326, 'timestamp': '2025-09-30 22:18:54.478613', 'step': 5002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:54.536379', 'step': 5002, 'epoch': 1} {'type': 'loss', 'content': 0.15841080248355865, 'timestamp': '2025-09-30 22:18:54.539455', 'step': 5003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:54.601142', 'step': 5003, 'epoch': 1} {'type': 'loss', 'content': 0.14017240703105927, 'timestamp': '2025-09-30 22:18:54.613841', 'step': 5004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:54.678704', 'step': 5004, 'epoch': 1} {'type': 'loss', 'content': 0.19041147828102112, 'timestamp': '2025-09-30 22:18:54.681953', 'step': 5005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:54.747529', 'step': 5005, 'epoch': 1} {'type': 'loss', 'content': 0.19023005664348602, 'timestamp': '2025-09-30 22:18:54.764259', 'step': 5006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:54.822737', 'step': 5006, 'epoch': 1} {'type': 'loss', 'content': 0.16002854704856873, 'timestamp': '2025-09-30 22:18:54.826365', 'step': 5007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:54.883211', 'step': 5007, 'epoch': 1} {'type': 'loss', 'content': 0.17483215034008026, 'timestamp': '2025-09-30 22:18:54.889678', 'step': 5008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:54.946588', 'step': 5008, 'epoch': 1} {'type': 'loss', 'content': 0.20134298503398895, 'timestamp': '2025-09-30 22:18:54.951038', 'step': 5009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:55.008687', 'step': 5009, 'epoch': 1} {'type': 'loss', 'content': 0.16787885129451752, 'timestamp': '2025-09-30 22:18:55.012208', 'step': 5010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:55.076454', 'step': 5010, 'epoch': 1} {'type': 'loss', 'content': 0.20701374113559723, 'timestamp': '2025-09-30 22:18:55.080276', 'step': 5011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:55.146221', 'step': 5011, 'epoch': 1} {'type': 'loss', 'content': 0.16661979258060455, 'timestamp': '2025-09-30 22:18:55.153107', 'step': 5012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:55.211211', 'step': 5012, 'epoch': 1} {'type': 'loss', 'content': 0.16796493530273438, 'timestamp': '2025-09-30 22:18:55.214964', 'step': 5013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:55.279526', 'step': 5013, 'epoch': 1} {'type': 'loss', 'content': 0.15229012072086334, 'timestamp': '2025-09-30 22:18:55.283020', 'step': 5014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:55.341483', 'step': 5014, 'epoch': 1} {'type': 'loss', 'content': 0.11340605467557907, 'timestamp': '2025-09-30 22:18:55.345089', 'step': 5015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:55.402568', 'step': 5015, 'epoch': 1} {'type': 'loss', 'content': 0.13753873109817505, 'timestamp': '2025-09-30 22:18:55.409438', 'step': 5016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:55.465939', 'step': 5016, 'epoch': 1} {'type': 'loss', 'content': 0.22625961899757385, 'timestamp': '2025-09-30 22:18:55.475038', 'step': 5017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:55.539396', 'step': 5017, 'epoch': 1} {'type': 'loss', 'content': 0.19305706024169922, 'timestamp': '2025-09-30 22:18:55.550114', 'step': 5018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:55.616855', 'step': 5018, 'epoch': 1} {'type': 'loss', 'content': 0.1307191252708435, 'timestamp': '2025-09-30 22:18:55.621363', 'step': 5019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:55.680953', 'step': 5019, 'epoch': 1} {'type': 'loss', 'content': 0.1937514692544937, 'timestamp': '2025-09-30 22:18:55.688047', 'step': 5020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:55.752256', 'step': 5020, 'epoch': 1} {'type': 'loss', 'content': 0.09834884852170944, 'timestamp': '2025-09-30 22:18:55.755088', 'step': 5021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:55.813856', 'step': 5021, 'epoch': 1} {'type': 'loss', 'content': 0.15684311091899872, 'timestamp': '2025-09-30 22:18:55.817114', 'step': 5022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:55.874663', 'step': 5022, 'epoch': 1} {'type': 'loss', 'content': 0.10085020214319229, 'timestamp': '2025-09-30 22:18:55.881276', 'step': 5023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:55.946319', 'step': 5023, 'epoch': 1} {'type': 'loss', 'content': 0.1487245410680771, 'timestamp': '2025-09-30 22:18:55.952867', 'step': 5024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:56.009688', 'step': 5024, 'epoch': 1} {'type': 'loss', 'content': 0.12766332924365997, 'timestamp': '2025-09-30 22:18:56.012600', 'step': 5025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:56.069855', 'step': 5025, 'epoch': 1} {'type': 'loss', 'content': 0.21756376326084137, 'timestamp': '2025-09-30 22:18:56.078924', 'step': 5026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:56.137752', 'step': 5026, 'epoch': 1} {'type': 'loss', 'content': 0.23133812844753265, 'timestamp': '2025-09-30 22:18:56.147101', 'step': 5027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:56.205195', 'step': 5027, 'epoch': 1} {'type': 'loss', 'content': 0.2234552651643753, 'timestamp': '2025-09-30 22:18:56.217925', 'step': 5028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:56.275079', 'step': 5028, 'epoch': 1} {'type': 'loss', 'content': 0.1691931188106537, 'timestamp': '2025-09-30 22:18:56.278264', 'step': 5029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:56.342802', 'step': 5029, 'epoch': 1} {'type': 'loss', 'content': 0.15300685167312622, 'timestamp': '2025-09-30 22:18:56.347767', 'step': 5030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:56.411986', 'step': 5030, 'epoch': 1} {'type': 'loss', 'content': 0.13705971837043762, 'timestamp': '2025-09-30 22:18:56.418772', 'step': 5031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:56.476104', 'step': 5031, 'epoch': 1} {'type': 'loss', 'content': 0.1377744823694229, 'timestamp': '2025-09-30 22:18:56.483195', 'step': 5032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:56.540427', 'step': 5032, 'epoch': 1} {'type': 'loss', 'content': 0.1842861920595169, 'timestamp': '2025-09-30 22:18:56.543632', 'step': 5033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:56.603131', 'step': 5033, 'epoch': 1} {'type': 'loss', 'content': 0.16520164906978607, 'timestamp': '2025-09-30 22:18:56.607006', 'step': 5034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:56.664986', 'step': 5034, 'epoch': 1} {'type': 'loss', 'content': 0.11132912337779999, 'timestamp': '2025-09-30 22:18:56.675484', 'step': 5035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:56.732697', 'step': 5035, 'epoch': 1} {'type': 'loss', 'content': 0.26810625195503235, 'timestamp': '2025-09-30 22:18:56.748146', 'step': 5036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:56.821818', 'step': 5036, 'epoch': 1} {'type': 'loss', 'content': 0.13823312520980835, 'timestamp': '2025-09-30 22:18:56.832340', 'step': 5037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:56.898047', 'step': 5037, 'epoch': 1} {'type': 'loss', 'content': 0.19597840309143066, 'timestamp': '2025-09-30 22:18:56.902548', 'step': 5038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:56.960218', 'step': 5038, 'epoch': 1} {'type': 'loss', 'content': 0.27396947145462036, 'timestamp': '2025-09-30 22:18:56.971988', 'step': 5039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:57.038325', 'step': 5039, 'epoch': 1} {'type': 'loss', 'content': 0.1917111575603485, 'timestamp': '2025-09-30 22:18:57.045447', 'step': 5040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:57.102289', 'step': 5040, 'epoch': 1} {'type': 'loss', 'content': 0.17093324661254883, 'timestamp': '2025-09-30 22:18:57.113845', 'step': 5041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:57.177854', 'step': 5041, 'epoch': 1} {'type': 'loss', 'content': 0.15472657978534698, 'timestamp': '2025-09-30 22:18:57.180514', 'step': 5042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:57.237406', 'step': 5042, 'epoch': 1} {'type': 'loss', 'content': 0.22691844403743744, 'timestamp': '2025-09-30 22:18:57.239876', 'step': 5043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:57.304367', 'step': 5043, 'epoch': 1} {'type': 'loss', 'content': 0.20316551625728607, 'timestamp': '2025-09-30 22:18:57.311679', 'step': 5044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:57.380935', 'step': 5044, 'epoch': 1} {'type': 'loss', 'content': 0.2341242879629135, 'timestamp': '2025-09-30 22:18:57.384024', 'step': 5045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:57.441399', 'step': 5045, 'epoch': 1} {'type': 'loss', 'content': 0.12574061751365662, 'timestamp': '2025-09-30 22:18:57.446038', 'step': 5046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:57.503527', 'step': 5046, 'epoch': 1} {'type': 'loss', 'content': 0.2050175666809082, 'timestamp': '2025-09-30 22:18:57.506029', 'step': 5047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:57.562732', 'step': 5047, 'epoch': 1} {'type': 'loss', 'content': 0.14356230199337006, 'timestamp': '2025-09-30 22:18:57.570956', 'step': 5048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:18:57.630256', 'step': 5048, 'epoch': 1} {'type': 'loss', 'content': 0.1222757026553154, 'timestamp': '2025-09-30 22:18:57.635865', 'step': 5049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:57.693046', 'step': 5049, 'epoch': 1} {'type': 'loss', 'content': 0.15073242783546448, 'timestamp': '2025-09-30 22:18:57.700046', 'step': 5050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:57.761421', 'step': 5050, 'epoch': 1} {'type': 'loss', 'content': 0.20503570139408112, 'timestamp': '2025-09-30 22:18:57.764845', 'step': 5051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:57.846468', 'step': 5051, 'epoch': 1} {'type': 'loss', 'content': 0.18195118010044098, 'timestamp': '2025-09-30 22:18:57.858049', 'step': 5052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:57.927439', 'step': 5052, 'epoch': 1} {'type': 'loss', 'content': 0.16241487860679626, 'timestamp': '2025-09-30 22:18:57.930288', 'step': 5053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:57.989899', 'step': 5053, 'epoch': 1} {'type': 'loss', 'content': 0.1424795538187027, 'timestamp': '2025-09-30 22:18:57.992627', 'step': 5054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:58.055040', 'step': 5054, 'epoch': 1} {'type': 'loss', 'content': 0.1985519826412201, 'timestamp': '2025-09-30 22:18:58.058065', 'step': 5055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:58.115136', 'step': 5055, 'epoch': 1} {'type': 'loss', 'content': 0.10314634442329407, 'timestamp': '2025-09-30 22:18:58.123363', 'step': 5056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:18:58.180316', 'step': 5056, 'epoch': 1} {'type': 'loss', 'content': 0.3421138525009155, 'timestamp': '2025-09-30 22:18:58.183044', 'step': 5057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:58.246380', 'step': 5057, 'epoch': 1} {'type': 'loss', 'content': 0.13071021437644958, 'timestamp': '2025-09-30 22:18:58.250960', 'step': 5058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:58.308224', 'step': 5058, 'epoch': 1} {'type': 'loss', 'content': 0.1513809710741043, 'timestamp': '2025-09-30 22:18:58.314225', 'step': 5059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:58.373906', 'step': 5059, 'epoch': 1} {'type': 'loss', 'content': 0.0829068049788475, 'timestamp': '2025-09-30 22:18:58.380679', 'step': 5060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:58.441740', 'step': 5060, 'epoch': 1} {'type': 'loss', 'content': 0.09223205596208572, 'timestamp': '2025-09-30 22:18:58.447811', 'step': 5061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:58.507626', 'step': 5061, 'epoch': 1} {'type': 'loss', 'content': 0.15814433991909027, 'timestamp': '2025-09-30 22:18:58.512299', 'step': 5062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:58.570172', 'step': 5062, 'epoch': 1} {'type': 'loss', 'content': 0.14359746873378754, 'timestamp': '2025-09-30 22:18:58.574752', 'step': 5063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:58.633558', 'step': 5063, 'epoch': 1} {'type': 'loss', 'content': 0.2534565329551697, 'timestamp': '2025-09-30 22:18:58.642884', 'step': 5064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:58.700443', 'step': 5064, 'epoch': 1} {'type': 'loss', 'content': 0.16177824139595032, 'timestamp': '2025-09-30 22:18:58.703355', 'step': 5065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:58.760666', 'step': 5065, 'epoch': 1} {'type': 'loss', 'content': 0.2378283590078354, 'timestamp': '2025-09-30 22:18:58.763427', 'step': 5066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:58.821529', 'step': 5066, 'epoch': 1} {'type': 'loss', 'content': 0.21515414118766785, 'timestamp': '2025-09-30 22:18:58.824272', 'step': 5067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:58.881736', 'step': 5067, 'epoch': 1} {'type': 'loss', 'content': 0.14728371798992157, 'timestamp': '2025-09-30 22:18:58.888557', 'step': 5068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:18:58.948691', 'step': 5068, 'epoch': 1} {'type': 'loss', 'content': 0.183613121509552, 'timestamp': '2025-09-30 22:18:58.956747', 'step': 5069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:59.017835', 'step': 5069, 'epoch': 1} {'type': 'loss', 'content': 0.23415598273277283, 'timestamp': '2025-09-30 22:18:59.021851', 'step': 5070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:59.078912', 'step': 5070, 'epoch': 1} {'type': 'loss', 'content': 0.13127319514751434, 'timestamp': '2025-09-30 22:18:59.082659', 'step': 5071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:59.140921', 'step': 5071, 'epoch': 1} {'type': 'loss', 'content': 0.06200825050473213, 'timestamp': '2025-09-30 22:18:59.147533', 'step': 5072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:59.203923', 'step': 5072, 'epoch': 1} {'type': 'loss', 'content': 0.14775502681732178, 'timestamp': '2025-09-30 22:18:59.207351', 'step': 5073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:59.267916', 'step': 5073, 'epoch': 1} {'type': 'loss', 'content': 0.1750878542661667, 'timestamp': '2025-09-30 22:18:59.271142', 'step': 5074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:59.327980', 'step': 5074, 'epoch': 1} {'type': 'loss', 'content': 0.15765392780303955, 'timestamp': '2025-09-30 22:18:59.333685', 'step': 5075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:59.391441', 'step': 5075, 'epoch': 1} {'type': 'loss', 'content': 0.17679783701896667, 'timestamp': '2025-09-30 22:18:59.399313', 'step': 5076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:59.457291', 'step': 5076, 'epoch': 1} {'type': 'loss', 'content': 0.1397112011909485, 'timestamp': '2025-09-30 22:18:59.461643', 'step': 5077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:59.521346', 'step': 5077, 'epoch': 1} {'type': 'loss', 'content': 0.14506393671035767, 'timestamp': '2025-09-30 22:18:59.524676', 'step': 5078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:59.582985', 'step': 5078, 'epoch': 1} {'type': 'loss', 'content': 0.19779573380947113, 'timestamp': '2025-09-30 22:18:59.586070', 'step': 5079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:59.643151', 'step': 5079, 'epoch': 1} {'type': 'loss', 'content': 0.13592255115509033, 'timestamp': '2025-09-30 22:18:59.649348', 'step': 5080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:18:59.705505', 'step': 5080, 'epoch': 1} {'type': 'loss', 'content': 0.1763998568058014, 'timestamp': '2025-09-30 22:18:59.711458', 'step': 5081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:18:59.778471', 'step': 5081, 'epoch': 1} {'type': 'loss', 'content': 0.23678818345069885, 'timestamp': '2025-09-30 22:18:59.781031', 'step': 5082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:18:59.840027', 'step': 5082, 'epoch': 1} {'type': 'loss', 'content': 0.13534609973430634, 'timestamp': '2025-09-30 22:18:59.844577', 'step': 5083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:59.921697', 'step': 5083, 'epoch': 1} {'type': 'loss', 'content': 0.11255586892366409, 'timestamp': '2025-09-30 22:18:59.931052', 'step': 5084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:18:59.986870', 'step': 5084, 'epoch': 1} {'type': 'loss', 'content': 0.15635325014591217, 'timestamp': '2025-09-30 22:18:59.991590', 'step': 5085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.059382', 'step': 5085, 'epoch': 1} {'type': 'loss', 'content': 0.10633888840675354, 'timestamp': '2025-09-30 22:19:00.062312', 'step': 5086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:00.124430', 'step': 5086, 'epoch': 1} {'type': 'loss', 'content': 0.23258209228515625, 'timestamp': '2025-09-30 22:19:00.127853', 'step': 5087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.194171', 'step': 5087, 'epoch': 1} {'type': 'loss', 'content': 0.1294270008802414, 'timestamp': '2025-09-30 22:19:00.201805', 'step': 5088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:00.258307', 'step': 5088, 'epoch': 1} {'type': 'loss', 'content': 0.11433945596218109, 'timestamp': '2025-09-30 22:19:00.262831', 'step': 5089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:00.321259', 'step': 5089, 'epoch': 1} {'type': 'loss', 'content': 0.21159771084785461, 'timestamp': '2025-09-30 22:19:00.331674', 'step': 5090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:00.391284', 'step': 5090, 'epoch': 1} {'type': 'loss', 'content': 0.24563463032245636, 'timestamp': '2025-09-30 22:19:00.402002', 'step': 5091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:00.465056', 'step': 5091, 'epoch': 1} {'type': 'loss', 'content': 0.26578864455223083, 'timestamp': '2025-09-30 22:19:00.477774', 'step': 5092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.533574', 'step': 5092, 'epoch': 1} {'type': 'loss', 'content': 0.08797310292720795, 'timestamp': '2025-09-30 22:19:00.542047', 'step': 5093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.598851', 'step': 5093, 'epoch': 1} {'type': 'loss', 'content': 0.13129796087741852, 'timestamp': '2025-09-30 22:19:00.602252', 'step': 5094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.665676', 'step': 5094, 'epoch': 1} {'type': 'loss', 'content': 0.1901627629995346, 'timestamp': '2025-09-30 22:19:00.673726', 'step': 5095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.733109', 'step': 5095, 'epoch': 1} {'type': 'loss', 'content': 0.21464531123638153, 'timestamp': '2025-09-30 22:19:00.739580', 'step': 5096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.795310', 'step': 5096, 'epoch': 1} {'type': 'loss', 'content': 0.12056008726358414, 'timestamp': '2025-09-30 22:19:00.798436', 'step': 5097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:00.860341', 'step': 5097, 'epoch': 1} {'type': 'loss', 'content': 0.11761105805635452, 'timestamp': '2025-09-30 22:19:00.863473', 'step': 5098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:00.920268', 'step': 5098, 'epoch': 1} {'type': 'loss', 'content': 0.16691854596138, 'timestamp': '2025-09-30 22:19:00.927161', 'step': 5099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:00.987397', 'step': 5099, 'epoch': 1} {'type': 'loss', 'content': 0.19801922142505646, 'timestamp': '2025-09-30 22:19:00.993831', 'step': 5100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:01.053409', 'step': 5100, 'epoch': 1} {'type': 'loss', 'content': 0.1573740392923355, 'timestamp': '2025-09-30 22:19:01.056174', 'step': 5101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:01.118456', 'step': 5101, 'epoch': 1} {'type': 'loss', 'content': 0.19784869253635406, 'timestamp': '2025-09-30 22:19:01.122100', 'step': 5102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:01.192562', 'step': 5102, 'epoch': 1} {'type': 'loss', 'content': 0.2511502206325531, 'timestamp': '2025-09-30 22:19:01.195954', 'step': 5103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:01.258304', 'step': 5103, 'epoch': 1} {'type': 'loss', 'content': 0.24508161842823029, 'timestamp': '2025-09-30 22:19:01.268642', 'step': 5104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:01.328082', 'step': 5104, 'epoch': 1} {'type': 'loss', 'content': 0.17040425539016724, 'timestamp': '2025-09-30 22:19:01.330633', 'step': 5105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:01.389344', 'step': 5105, 'epoch': 1} {'type': 'loss', 'content': 0.1373477578163147, 'timestamp': '2025-09-30 22:19:01.397145', 'step': 5106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:01.470397', 'step': 5106, 'epoch': 1} {'type': 'loss', 'content': 0.157382532954216, 'timestamp': '2025-09-30 22:19:01.474509', 'step': 5107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:01.532785', 'step': 5107, 'epoch': 1} {'type': 'loss', 'content': 0.166500985622406, 'timestamp': '2025-09-30 22:19:01.539656', 'step': 5108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:01.597730', 'step': 5108, 'epoch': 1} {'type': 'loss', 'content': 0.235329270362854, 'timestamp': '2025-09-30 22:19:01.602186', 'step': 5109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:01.661365', 'step': 5109, 'epoch': 1} {'type': 'loss', 'content': 0.2578548491001129, 'timestamp': '2025-09-30 22:19:01.664634', 'step': 5110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:01.722642', 'step': 5110, 'epoch': 1} {'type': 'loss', 'content': 0.1816178560256958, 'timestamp': '2025-09-30 22:19:01.725680', 'step': 5111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:01.784569', 'step': 5111, 'epoch': 1} {'type': 'loss', 'content': 0.1279158592224121, 'timestamp': '2025-09-30 22:19:01.793921', 'step': 5112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:01.858527', 'step': 5112, 'epoch': 1} {'type': 'loss', 'content': 0.1791326105594635, 'timestamp': '2025-09-30 22:19:01.861748', 'step': 5113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:01.919711', 'step': 5113, 'epoch': 1} {'type': 'loss', 'content': 0.2561286687850952, 'timestamp': '2025-09-30 22:19:01.924372', 'step': 5114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:01.987511', 'step': 5114, 'epoch': 1} {'type': 'loss', 'content': 0.12645196914672852, 'timestamp': '2025-09-30 22:19:01.992042', 'step': 5115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:02.050362', 'step': 5115, 'epoch': 1} {'type': 'loss', 'content': 0.18604464828968048, 'timestamp': '2025-09-30 22:19:02.060784', 'step': 5116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:02.118983', 'step': 5116, 'epoch': 1} {'type': 'loss', 'content': 0.11670379340648651, 'timestamp': '2025-09-30 22:19:02.123718', 'step': 5117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:02.186809', 'step': 5117, 'epoch': 1} {'type': 'loss', 'content': 0.21677221357822418, 'timestamp': '2025-09-30 22:19:02.196240', 'step': 5118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:02.256279', 'step': 5118, 'epoch': 1} {'type': 'loss', 'content': 0.21819603443145752, 'timestamp': '2025-09-30 22:19:02.259852', 'step': 5119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:02.323318', 'step': 5119, 'epoch': 1} {'type': 'loss', 'content': 0.20919601619243622, 'timestamp': '2025-09-30 22:19:02.330381', 'step': 5120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:02.387752', 'step': 5120, 'epoch': 1} {'type': 'loss', 'content': 0.1597907543182373, 'timestamp': '2025-09-30 22:19:02.391267', 'step': 5121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:02.449351', 'step': 5121, 'epoch': 1} {'type': 'loss', 'content': 0.22536619007587433, 'timestamp': '2025-09-30 22:19:02.452393', 'step': 5122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:02.509947', 'step': 5122, 'epoch': 1} {'type': 'loss', 'content': 0.1262834519147873, 'timestamp': '2025-09-30 22:19:02.516749', 'step': 5123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:02.573901', 'step': 5123, 'epoch': 1} {'type': 'loss', 'content': 0.1194106712937355, 'timestamp': '2025-09-30 22:19:02.581458', 'step': 5124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:02.638703', 'step': 5124, 'epoch': 1} {'type': 'loss', 'content': 0.15721963346004486, 'timestamp': '2025-09-30 22:19:02.641786', 'step': 5125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:02.703498', 'step': 5125, 'epoch': 1} {'type': 'loss', 'content': 0.18113082647323608, 'timestamp': '2025-09-30 22:19:02.710379', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:19:16.473641', 'step': 5126, 'epoch': 1} {'type': 'pplx', 'content': 12875.022831748991, 'timestamp': '2025-09-30 22:19:16.485463', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:16.546497', 'step': 5126, 'epoch': 1} {'type': 'loss', 'content': 0.1099976897239685, 'timestamp': '2025-09-30 22:19:16.549420', 'step': 5127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:16.607727', 'step': 5127, 'epoch': 1} {'type': 'loss', 'content': 0.26684659719467163, 'timestamp': '2025-09-30 22:19:16.614429', 'step': 5128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:16.671217', 'step': 5128, 'epoch': 1} {'type': 'loss', 'content': 0.12292774021625519, 'timestamp': '2025-09-30 22:19:16.673624', 'step': 5129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:16.730322', 'step': 5129, 'epoch': 1} {'type': 'loss', 'content': 0.1462215781211853, 'timestamp': '2025-09-30 22:19:16.732564', 'step': 5130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:16.789078', 'step': 5130, 'epoch': 1} {'type': 'loss', 'content': 0.13963836431503296, 'timestamp': '2025-09-30 22:19:16.791319', 'step': 5131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:16.847245', 'step': 5131, 'epoch': 1} {'type': 'loss', 'content': 0.1168890967965126, 'timestamp': '2025-09-30 22:19:16.854120', 'step': 5132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:16.909540', 'step': 5132, 'epoch': 1} {'type': 'loss', 'content': 0.18037214875221252, 'timestamp': '2025-09-30 22:19:16.914744', 'step': 5133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:16.971867', 'step': 5133, 'epoch': 1} {'type': 'loss', 'content': 0.15543463826179504, 'timestamp': '2025-09-30 22:19:16.977313', 'step': 5134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:17.036645', 'step': 5134, 'epoch': 1} {'type': 'loss', 'content': 0.17909671366214752, 'timestamp': '2025-09-30 22:19:17.038959', 'step': 5135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:17.098767', 'step': 5135, 'epoch': 1} {'type': 'loss', 'content': 0.15863220393657684, 'timestamp': '2025-09-30 22:19:17.105779', 'step': 5136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:17.161507', 'step': 5136, 'epoch': 1} {'type': 'loss', 'content': 0.12122813612222672, 'timestamp': '2025-09-30 22:19:17.168841', 'step': 5137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:17.227825', 'step': 5137, 'epoch': 1} {'type': 'loss', 'content': 0.24188943207263947, 'timestamp': '2025-09-30 22:19:17.232577', 'step': 5138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:17.295955', 'step': 5138, 'epoch': 1} {'type': 'loss', 'content': 0.21888303756713867, 'timestamp': '2025-09-30 22:19:17.303484', 'step': 5139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:17.366443', 'step': 5139, 'epoch': 1} {'type': 'loss', 'content': 0.158614382147789, 'timestamp': '2025-09-30 22:19:17.372797', 'step': 5140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:17.434075', 'step': 5140, 'epoch': 1} {'type': 'loss', 'content': 0.16393613815307617, 'timestamp': '2025-09-30 22:19:17.439492', 'step': 5141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:17.496604', 'step': 5141, 'epoch': 1} {'type': 'loss', 'content': 0.14417539536952972, 'timestamp': '2025-09-30 22:19:17.498867', 'step': 5142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:17.555205', 'step': 5142, 'epoch': 1} {'type': 'loss', 'content': 0.1387462317943573, 'timestamp': '2025-09-30 22:19:17.557934', 'step': 5143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:17.615133', 'step': 5143, 'epoch': 1} {'type': 'loss', 'content': 0.14758524298667908, 'timestamp': '2025-09-30 22:19:17.622050', 'step': 5144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:17.681031', 'step': 5144, 'epoch': 1} {'type': 'loss', 'content': 0.1268661767244339, 'timestamp': '2025-09-30 22:19:17.683578', 'step': 5145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:17.739677', 'step': 5145, 'epoch': 1} {'type': 'loss', 'content': 0.12766166031360626, 'timestamp': '2025-09-30 22:19:17.742548', 'step': 5146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:17.798834', 'step': 5146, 'epoch': 1} {'type': 'loss', 'content': 0.13709469139575958, 'timestamp': '2025-09-30 22:19:17.801399', 'step': 5147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:17.857654', 'step': 5147, 'epoch': 1} {'type': 'loss', 'content': 0.19784358143806458, 'timestamp': '2025-09-30 22:19:17.864582', 'step': 5148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:17.923654', 'step': 5148, 'epoch': 1} {'type': 'loss', 'content': 0.04953743517398834, 'timestamp': '2025-09-30 22:19:17.925979', 'step': 5149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:17.981865', 'step': 5149, 'epoch': 1} {'type': 'loss', 'content': 0.23017875850200653, 'timestamp': '2025-09-30 22:19:17.984415', 'step': 5150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:18.040722', 'step': 5150, 'epoch': 1} {'type': 'loss', 'content': 0.08119551837444305, 'timestamp': '2025-09-30 22:19:18.043158', 'step': 5151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:18.099394', 'step': 5151, 'epoch': 1} {'type': 'loss', 'content': 0.1477050483226776, 'timestamp': '2025-09-30 22:19:18.108206', 'step': 5152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:18.166980', 'step': 5152, 'epoch': 1} {'type': 'loss', 'content': 0.180435448884964, 'timestamp': '2025-09-30 22:19:18.169916', 'step': 5153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:18.226509', 'step': 5153, 'epoch': 1} {'type': 'loss', 'content': 0.16109906136989594, 'timestamp': '2025-09-30 22:19:18.229272', 'step': 5154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:18.286728', 'step': 5154, 'epoch': 1} {'type': 'loss', 'content': 0.17671072483062744, 'timestamp': '2025-09-30 22:19:18.289277', 'step': 5155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:18.363527', 'step': 5155, 'epoch': 1} {'type': 'loss', 'content': 0.11440214514732361, 'timestamp': '2025-09-30 22:19:18.370032', 'step': 5156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:18.425722', 'step': 5156, 'epoch': 1} {'type': 'loss', 'content': 0.15309427678585052, 'timestamp': '2025-09-30 22:19:18.429069', 'step': 5157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:18.486185', 'step': 5157, 'epoch': 1} {'type': 'loss', 'content': 0.16202107071876526, 'timestamp': '2025-09-30 22:19:18.488567', 'step': 5158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:18.546187', 'step': 5158, 'epoch': 1} {'type': 'loss', 'content': 0.14859238266944885, 'timestamp': '2025-09-30 22:19:18.548877', 'step': 5159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:18.605226', 'step': 5159, 'epoch': 1} {'type': 'loss', 'content': 0.17029023170471191, 'timestamp': '2025-09-30 22:19:18.611784', 'step': 5160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:18.668411', 'step': 5160, 'epoch': 1} {'type': 'loss', 'content': 0.14578597247600555, 'timestamp': '2025-09-30 22:19:18.670877', 'step': 5161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:18.740553', 'step': 5161, 'epoch': 1} {'type': 'loss', 'content': 0.1923561841249466, 'timestamp': '2025-09-30 22:19:18.743086', 'step': 5162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:18.801858', 'step': 5162, 'epoch': 1} {'type': 'loss', 'content': 0.22687439620494843, 'timestamp': '2025-09-30 22:19:18.806270', 'step': 5163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:18.862595', 'step': 5163, 'epoch': 1} {'type': 'loss', 'content': 0.19887638092041016, 'timestamp': '2025-09-30 22:19:18.869528', 'step': 5164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:18.925614', 'step': 5164, 'epoch': 1} {'type': 'loss', 'content': 0.2036324441432953, 'timestamp': '2025-09-30 22:19:18.928358', 'step': 5165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:18.996894', 'step': 5165, 'epoch': 1} {'type': 'loss', 'content': 0.1045568659901619, 'timestamp': '2025-09-30 22:19:18.999657', 'step': 5166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:19.056523', 'step': 5166, 'epoch': 1} {'type': 'loss', 'content': 0.1930394321680069, 'timestamp': '2025-09-30 22:19:19.059535', 'step': 5167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:19.119834', 'step': 5167, 'epoch': 1} {'type': 'loss', 'content': 0.0992448627948761, 'timestamp': '2025-09-30 22:19:19.126775', 'step': 5168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:19.182754', 'step': 5168, 'epoch': 1} {'type': 'loss', 'content': 0.23951487243175507, 'timestamp': '2025-09-30 22:19:19.185836', 'step': 5169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:19.241999', 'step': 5169, 'epoch': 1} {'type': 'loss', 'content': 0.180569589138031, 'timestamp': '2025-09-30 22:19:19.244527', 'step': 5170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:19.300741', 'step': 5170, 'epoch': 1} {'type': 'loss', 'content': 0.21713824570178986, 'timestamp': '2025-09-30 22:19:19.303494', 'step': 5171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:19.367446', 'step': 5171, 'epoch': 1} {'type': 'loss', 'content': 0.1842903196811676, 'timestamp': '2025-09-30 22:19:19.373566', 'step': 5172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:19.428944', 'step': 5172, 'epoch': 1} {'type': 'loss', 'content': 0.11752403527498245, 'timestamp': '2025-09-30 22:19:19.431605', 'step': 5173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:19.487964', 'step': 5173, 'epoch': 1} {'type': 'loss', 'content': 0.15677964687347412, 'timestamp': '2025-09-30 22:19:19.490603', 'step': 5174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:19.561909', 'step': 5174, 'epoch': 1} {'type': 'loss', 'content': 0.17783160507678986, 'timestamp': '2025-09-30 22:19:19.565438', 'step': 5175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:19.623271', 'step': 5175, 'epoch': 1} {'type': 'loss', 'content': 0.14139018952846527, 'timestamp': '2025-09-30 22:19:19.629885', 'step': 5176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:19:19.694833', 'step': 5176, 'epoch': 1} {'type': 'loss', 'content': 0.16274163126945496, 'timestamp': '2025-09-30 22:19:19.697189', 'step': 5177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:19.753750', 'step': 5177, 'epoch': 1} {'type': 'loss', 'content': 0.14657892286777496, 'timestamp': '2025-09-30 22:19:19.756298', 'step': 5178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:19.813453', 'step': 5178, 'epoch': 1} {'type': 'loss', 'content': 0.1491921991109848, 'timestamp': '2025-09-30 22:19:19.828181', 'step': 5179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:19.900743', 'step': 5179, 'epoch': 1} {'type': 'loss', 'content': 0.18923504650592804, 'timestamp': '2025-09-30 22:19:19.928384', 'step': 5180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:20.011436', 'step': 5180, 'epoch': 1} {'type': 'loss', 'content': 0.14922745525836945, 'timestamp': '2025-09-30 22:19:20.037175', 'step': 5181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:20.119308', 'step': 5181, 'epoch': 1} {'type': 'loss', 'content': 0.17133429646492004, 'timestamp': '2025-09-30 22:19:20.140751', 'step': 5182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:20.225076', 'step': 5182, 'epoch': 1} {'type': 'loss', 'content': 0.15662196278572083, 'timestamp': '2025-09-30 22:19:20.244321', 'step': 5183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:20.318705', 'step': 5183, 'epoch': 1} {'type': 'loss', 'content': 0.18602316081523895, 'timestamp': '2025-09-30 22:19:20.349012', 'step': 5184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:20.430230', 'step': 5184, 'epoch': 1} {'type': 'loss', 'content': 0.22087351977825165, 'timestamp': '2025-09-30 22:19:20.444854', 'step': 5185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:20.509070', 'step': 5185, 'epoch': 1} {'type': 'loss', 'content': 0.17708654701709747, 'timestamp': '2025-09-30 22:19:20.520824', 'step': 5186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:20.586111', 'step': 5186, 'epoch': 1} {'type': 'loss', 'content': 0.15178388357162476, 'timestamp': '2025-09-30 22:19:20.603814', 'step': 5187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:20.665572', 'step': 5187, 'epoch': 1} {'type': 'loss', 'content': 0.23681794106960297, 'timestamp': '2025-09-30 22:19:20.690411', 'step': 5188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:20.760287', 'step': 5188, 'epoch': 1} {'type': 'loss', 'content': 0.19082367420196533, 'timestamp': '2025-09-30 22:19:20.783328', 'step': 5189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:20.853985', 'step': 5189, 'epoch': 1} {'type': 'loss', 'content': 0.1663958877325058, 'timestamp': '2025-09-30 22:19:20.859151', 'step': 5190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:20.922655', 'step': 5190, 'epoch': 1} {'type': 'loss', 'content': 0.1341480165719986, 'timestamp': '2025-09-30 22:19:20.934277', 'step': 5191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:21.015764', 'step': 5191, 'epoch': 1} {'type': 'loss', 'content': 0.19549775123596191, 'timestamp': '2025-09-30 22:19:21.044844', 'step': 5192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:21.110606', 'step': 5192, 'epoch': 1} {'type': 'loss', 'content': 0.14056967198848724, 'timestamp': '2025-09-30 22:19:21.119229', 'step': 5193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:21.186529', 'step': 5193, 'epoch': 1} {'type': 'loss', 'content': 0.18021009862422943, 'timestamp': '2025-09-30 22:19:21.193912', 'step': 5194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:21.262906', 'step': 5194, 'epoch': 1} {'type': 'loss', 'content': 0.12870490550994873, 'timestamp': '2025-09-30 22:19:21.275462', 'step': 5195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:21.342603', 'step': 5195, 'epoch': 1} {'type': 'loss', 'content': 0.13129840791225433, 'timestamp': '2025-09-30 22:19:21.357667', 'step': 5196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:21.426854', 'step': 5196, 'epoch': 1} {'type': 'loss', 'content': 0.2551821768283844, 'timestamp': '2025-09-30 22:19:21.443631', 'step': 5197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:21.510152', 'step': 5197, 'epoch': 1} {'type': 'loss', 'content': 0.22342640161514282, 'timestamp': '2025-09-30 22:19:21.535923', 'step': 5198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:21.596856', 'step': 5198, 'epoch': 1} {'type': 'loss', 'content': 0.15795861184597015, 'timestamp': '2025-09-30 22:19:21.631171', 'step': 5199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:21.700844', 'step': 5199, 'epoch': 1} {'type': 'loss', 'content': 0.19881746172904968, 'timestamp': '2025-09-30 22:19:21.713965', 'step': 5200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:21.779972', 'step': 5200, 'epoch': 1} {'type': 'loss', 'content': 0.25957193970680237, 'timestamp': '2025-09-30 22:19:21.782473', 'step': 5201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:21.838510', 'step': 5201, 'epoch': 1} {'type': 'loss', 'content': 0.20108628273010254, 'timestamp': '2025-09-30 22:19:21.841051', 'step': 5202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:19:21.899923', 'step': 5202, 'epoch': 1} {'type': 'loss', 'content': 0.12662115693092346, 'timestamp': '2025-09-30 22:19:21.902321', 'step': 5203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:21.967278', 'step': 5203, 'epoch': 1} {'type': 'loss', 'content': 0.10837328433990479, 'timestamp': '2025-09-30 22:19:21.974189', 'step': 5204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.030155', 'step': 5204, 'epoch': 1} {'type': 'loss', 'content': 0.11807752400636673, 'timestamp': '2025-09-30 22:19:22.035900', 'step': 5205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:22.095609', 'step': 5205, 'epoch': 1} {'type': 'loss', 'content': 0.1601056009531021, 'timestamp': '2025-09-30 22:19:22.099444', 'step': 5206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:22.157428', 'step': 5206, 'epoch': 1} {'type': 'loss', 'content': 0.13758987188339233, 'timestamp': '2025-09-30 22:19:22.160964', 'step': 5207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:22.216845', 'step': 5207, 'epoch': 1} {'type': 'loss', 'content': 0.19920729100704193, 'timestamp': '2025-09-30 22:19:22.222991', 'step': 5208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.281434', 'step': 5208, 'epoch': 1} {'type': 'loss', 'content': 0.2293444126844406, 'timestamp': '2025-09-30 22:19:22.283699', 'step': 5209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.339889', 'step': 5209, 'epoch': 1} {'type': 'loss', 'content': 0.18320892751216888, 'timestamp': '2025-09-30 22:19:22.343199', 'step': 5210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:22.399102', 'step': 5210, 'epoch': 1} {'type': 'loss', 'content': 0.1356612592935562, 'timestamp': '2025-09-30 22:19:22.402728', 'step': 5211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.474391', 'step': 5211, 'epoch': 1} {'type': 'loss', 'content': 0.175458624958992, 'timestamp': '2025-09-30 22:19:22.481307', 'step': 5212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.536910', 'step': 5212, 'epoch': 1} {'type': 'loss', 'content': 0.16503013670444489, 'timestamp': '2025-09-30 22:19:22.540598', 'step': 5213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:22.597743', 'step': 5213, 'epoch': 1} {'type': 'loss', 'content': 0.21750116348266602, 'timestamp': '2025-09-30 22:19:22.600232', 'step': 5214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:22.657199', 'step': 5214, 'epoch': 1} {'type': 'loss', 'content': 0.14605754613876343, 'timestamp': '2025-09-30 22:19:22.659824', 'step': 5215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.718850', 'step': 5215, 'epoch': 1} {'type': 'loss', 'content': 0.16050447523593903, 'timestamp': '2025-09-30 22:19:22.726045', 'step': 5216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.782547', 'step': 5216, 'epoch': 1} {'type': 'loss', 'content': 0.1732107400894165, 'timestamp': '2025-09-30 22:19:22.785499', 'step': 5217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:22.848958', 'step': 5217, 'epoch': 1} {'type': 'loss', 'content': 0.1761307269334793, 'timestamp': '2025-09-30 22:19:22.851295', 'step': 5218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:22.907792', 'step': 5218, 'epoch': 1} {'type': 'loss', 'content': 0.13511425256729126, 'timestamp': '2025-09-30 22:19:22.910989', 'step': 5219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:22.966759', 'step': 5219, 'epoch': 1} {'type': 'loss', 'content': 0.18448702991008759, 'timestamp': '2025-09-30 22:19:22.973910', 'step': 5220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:23.033552', 'step': 5220, 'epoch': 1} {'type': 'loss', 'content': 0.23949509859085083, 'timestamp': '2025-09-30 22:19:23.037361', 'step': 5221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:23.093746', 'step': 5221, 'epoch': 1} {'type': 'loss', 'content': 0.2728419899940491, 'timestamp': '2025-09-30 22:19:23.096513', 'step': 5222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:23.153470', 'step': 5222, 'epoch': 1} {'type': 'loss', 'content': 0.09652497619390488, 'timestamp': '2025-09-30 22:19:23.156870', 'step': 5223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:23.213974', 'step': 5223, 'epoch': 1} {'type': 'loss', 'content': 0.16878904402256012, 'timestamp': '2025-09-30 22:19:23.220298', 'step': 5224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:23.275608', 'step': 5224, 'epoch': 1} {'type': 'loss', 'content': 0.14179958403110504, 'timestamp': '2025-09-30 22:19:23.278103', 'step': 5225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:23.337335', 'step': 5225, 'epoch': 1} {'type': 'loss', 'content': 0.1135239452123642, 'timestamp': '2025-09-30 22:19:23.340910', 'step': 5226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:23.403557', 'step': 5226, 'epoch': 1} {'type': 'loss', 'content': 0.18383754789829254, 'timestamp': '2025-09-30 22:19:23.407096', 'step': 5227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:23.465476', 'step': 5227, 'epoch': 1} {'type': 'loss', 'content': 0.11019578576087952, 'timestamp': '2025-09-30 22:19:23.472060', 'step': 5228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:23.530077', 'step': 5228, 'epoch': 1} {'type': 'loss', 'content': 0.19771455228328705, 'timestamp': '2025-09-30 22:19:23.535028', 'step': 5229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:23.592453', 'step': 5229, 'epoch': 1} {'type': 'loss', 'content': 0.176885724067688, 'timestamp': '2025-09-30 22:19:23.602204', 'step': 5230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:23.671577', 'step': 5230, 'epoch': 1} {'type': 'loss', 'content': 0.09871994704008102, 'timestamp': '2025-09-30 22:19:23.679236', 'step': 5231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:23.740879', 'step': 5231, 'epoch': 1} {'type': 'loss', 'content': 0.2164425402879715, 'timestamp': '2025-09-30 22:19:23.746966', 'step': 5232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:23.811878', 'step': 5232, 'epoch': 1} {'type': 'loss', 'content': 0.15121284127235413, 'timestamp': '2025-09-30 22:19:23.817494', 'step': 5233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:23.876667', 'step': 5233, 'epoch': 1} {'type': 'loss', 'content': 0.2671723961830139, 'timestamp': '2025-09-30 22:19:23.882808', 'step': 5234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:23.944444', 'step': 5234, 'epoch': 1} {'type': 'loss', 'content': 0.062365736812353134, 'timestamp': '2025-09-30 22:19:23.954584', 'step': 5235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:24.019415', 'step': 5235, 'epoch': 1} {'type': 'loss', 'content': 0.1294657289981842, 'timestamp': '2025-09-30 22:19:24.028441', 'step': 5236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:24.102966', 'step': 5236, 'epoch': 1} {'type': 'loss', 'content': 0.11661788821220398, 'timestamp': '2025-09-30 22:19:24.105633', 'step': 5237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:24.163439', 'step': 5237, 'epoch': 1} {'type': 'loss', 'content': 0.22462663054466248, 'timestamp': '2025-09-30 22:19:24.166440', 'step': 5238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:24.224290', 'step': 5238, 'epoch': 1} {'type': 'loss', 'content': 0.12602205574512482, 'timestamp': '2025-09-30 22:19:24.228741', 'step': 5239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:24.287056', 'step': 5239, 'epoch': 1} {'type': 'loss', 'content': 0.2043026089668274, 'timestamp': '2025-09-30 22:19:24.292989', 'step': 5240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:24.350005', 'step': 5240, 'epoch': 1} {'type': 'loss', 'content': 0.1479358822107315, 'timestamp': '2025-09-30 22:19:24.352415', 'step': 5241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:24.409564', 'step': 5241, 'epoch': 1} {'type': 'loss', 'content': 0.14306434988975525, 'timestamp': '2025-09-30 22:19:24.412839', 'step': 5242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:24.480474', 'step': 5242, 'epoch': 1} {'type': 'loss', 'content': 0.1564653068780899, 'timestamp': '2025-09-30 22:19:24.483086', 'step': 5243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:24.544151', 'step': 5243, 'epoch': 1} {'type': 'loss', 'content': 0.15034911036491394, 'timestamp': '2025-09-30 22:19:24.551009', 'step': 5244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:24.606843', 'step': 5244, 'epoch': 1} {'type': 'loss', 'content': 0.08000446110963821, 'timestamp': '2025-09-30 22:19:24.609388', 'step': 5245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:24.665804', 'step': 5245, 'epoch': 1} {'type': 'loss', 'content': 0.14334706962108612, 'timestamp': '2025-09-30 22:19:24.668251', 'step': 5246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:24.728404', 'step': 5246, 'epoch': 1} {'type': 'loss', 'content': 0.17163272202014923, 'timestamp': '2025-09-30 22:19:24.731093', 'step': 5247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:24.792839', 'step': 5247, 'epoch': 1} {'type': 'loss', 'content': 0.13208068907260895, 'timestamp': '2025-09-30 22:19:24.798711', 'step': 5248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:24.855337', 'step': 5248, 'epoch': 1} {'type': 'loss', 'content': 0.15089920163154602, 'timestamp': '2025-09-30 22:19:24.858507', 'step': 5249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:24.927022', 'step': 5249, 'epoch': 1} {'type': 'loss', 'content': 0.18152450025081635, 'timestamp': '2025-09-30 22:19:24.929640', 'step': 5250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:24.986570', 'step': 5250, 'epoch': 1} {'type': 'loss', 'content': 0.09997103363275528, 'timestamp': '2025-09-30 22:19:24.989150', 'step': 5251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:25.047732', 'step': 5251, 'epoch': 1} {'type': 'loss', 'content': 0.14949090778827667, 'timestamp': '2025-09-30 22:19:25.053940', 'step': 5252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:25.109831', 'step': 5252, 'epoch': 1} {'type': 'loss', 'content': 0.26182064414024353, 'timestamp': '2025-09-30 22:19:25.112210', 'step': 5253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:25.168002', 'step': 5253, 'epoch': 1} {'type': 'loss', 'content': 0.168676495552063, 'timestamp': '2025-09-30 22:19:25.171424', 'step': 5254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:25.228830', 'step': 5254, 'epoch': 1} {'type': 'loss', 'content': 0.16212381422519684, 'timestamp': '2025-09-30 22:19:25.231424', 'step': 5255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:25.287849', 'step': 5255, 'epoch': 1} {'type': 'loss', 'content': 0.16256971657276154, 'timestamp': '2025-09-30 22:19:25.294211', 'step': 5256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:25.349835', 'step': 5256, 'epoch': 1} {'type': 'loss', 'content': 0.2174985706806183, 'timestamp': '2025-09-30 22:19:25.352176', 'step': 5257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:25.408150', 'step': 5257, 'epoch': 1} {'type': 'loss', 'content': 0.11305699497461319, 'timestamp': '2025-09-30 22:19:25.426103', 'step': 5258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:25.496600', 'step': 5258, 'epoch': 1} {'type': 'loss', 'content': 0.06337634474039078, 'timestamp': '2025-09-30 22:19:25.502383', 'step': 5259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:25.573040', 'step': 5259, 'epoch': 1} {'type': 'loss', 'content': 0.1409091204404831, 'timestamp': '2025-09-30 22:19:25.581493', 'step': 5260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:25.639318', 'step': 5260, 'epoch': 1} {'type': 'loss', 'content': 0.11364924907684326, 'timestamp': '2025-09-30 22:19:25.654967', 'step': 5261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:25.711711', 'step': 5261, 'epoch': 1} {'type': 'loss', 'content': 0.09679677337408066, 'timestamp': '2025-09-30 22:19:25.714268', 'step': 5262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:25.770891', 'step': 5262, 'epoch': 1} {'type': 'loss', 'content': 0.07660730928182602, 'timestamp': '2025-09-30 22:19:25.773967', 'step': 5263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:25.836290', 'step': 5263, 'epoch': 1} {'type': 'loss', 'content': 0.14889687299728394, 'timestamp': '2025-09-30 22:19:25.842616', 'step': 5264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:25.898612', 'step': 5264, 'epoch': 1} {'type': 'loss', 'content': 0.2736683785915375, 'timestamp': '2025-09-30 22:19:25.901287', 'step': 5265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:25.958873', 'step': 5265, 'epoch': 1} {'type': 'loss', 'content': 0.20674385130405426, 'timestamp': '2025-09-30 22:19:25.961270', 'step': 5266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:26.018094', 'step': 5266, 'epoch': 1} {'type': 'loss', 'content': 0.14290906488895416, 'timestamp': '2025-09-30 22:19:26.021030', 'step': 5267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:26.078966', 'step': 5267, 'epoch': 1} {'type': 'loss', 'content': 0.10764575004577637, 'timestamp': '2025-09-30 22:19:26.085626', 'step': 5268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:26.141374', 'step': 5268, 'epoch': 1} {'type': 'loss', 'content': 0.18742020428180695, 'timestamp': '2025-09-30 22:19:26.143759', 'step': 5269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:26.201018', 'step': 5269, 'epoch': 1} {'type': 'loss', 'content': 0.1388120949268341, 'timestamp': '2025-09-30 22:19:26.206567', 'step': 5270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:26.264296', 'step': 5270, 'epoch': 1} {'type': 'loss', 'content': 0.20101334154605865, 'timestamp': '2025-09-30 22:19:26.266969', 'step': 5271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:26.334966', 'step': 5271, 'epoch': 1} {'type': 'loss', 'content': 0.10009649395942688, 'timestamp': '2025-09-30 22:19:26.341054', 'step': 5272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:26.396901', 'step': 5272, 'epoch': 1} {'type': 'loss', 'content': 0.19192832708358765, 'timestamp': '2025-09-30 22:19:26.399592', 'step': 5273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:26.456894', 'step': 5273, 'epoch': 1} {'type': 'loss', 'content': 0.2536690831184387, 'timestamp': '2025-09-30 22:19:26.459898', 'step': 5274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:26.517180', 'step': 5274, 'epoch': 1} {'type': 'loss', 'content': 0.2705935835838318, 'timestamp': '2025-09-30 22:19:26.519605', 'step': 5275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:26.576015', 'step': 5275, 'epoch': 1} {'type': 'loss', 'content': 0.15188860893249512, 'timestamp': '2025-09-30 22:19:26.582794', 'step': 5276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:26.638708', 'step': 5276, 'epoch': 1} {'type': 'loss', 'content': 0.12636208534240723, 'timestamp': '2025-09-30 22:19:26.641544', 'step': 5277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:26.712329', 'step': 5277, 'epoch': 1} {'type': 'loss', 'content': 0.14399592578411102, 'timestamp': '2025-09-30 22:19:26.715619', 'step': 5278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:26.775781', 'step': 5278, 'epoch': 1} {'type': 'loss', 'content': 0.3006613254547119, 'timestamp': '2025-09-30 22:19:26.779567', 'step': 5279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:26.841524', 'step': 5279, 'epoch': 1} {'type': 'loss', 'content': 0.12860310077667236, 'timestamp': '2025-09-30 22:19:26.849712', 'step': 5280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:26.916150', 'step': 5280, 'epoch': 1} {'type': 'loss', 'content': 0.18677373230457306, 'timestamp': '2025-09-30 22:19:26.919991', 'step': 5281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:26.977174', 'step': 5281, 'epoch': 1} {'type': 'loss', 'content': 0.19162890315055847, 'timestamp': '2025-09-30 22:19:26.980054', 'step': 5282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:27.041333', 'step': 5282, 'epoch': 1} {'type': 'loss', 'content': 0.13361848890781403, 'timestamp': '2025-09-30 22:19:27.043584', 'step': 5283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:27.100895', 'step': 5283, 'epoch': 1} {'type': 'loss', 'content': 0.12183278053998947, 'timestamp': '2025-09-30 22:19:27.107914', 'step': 5284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:27.164459', 'step': 5284, 'epoch': 1} {'type': 'loss', 'content': 0.19347643852233887, 'timestamp': '2025-09-30 22:19:27.167271', 'step': 5285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:27.236901', 'step': 5285, 'epoch': 1} {'type': 'loss', 'content': 0.24895299971103668, 'timestamp': '2025-09-30 22:19:27.239974', 'step': 5286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:27.296820', 'step': 5286, 'epoch': 1} {'type': 'loss', 'content': 0.1434011310338974, 'timestamp': '2025-09-30 22:19:27.299453', 'step': 5287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:27.355266', 'step': 5287, 'epoch': 1} {'type': 'loss', 'content': 0.10260248184204102, 'timestamp': '2025-09-30 22:19:27.362288', 'step': 5288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:27.417896', 'step': 5288, 'epoch': 1} {'type': 'loss', 'content': 0.2923668622970581, 'timestamp': '2025-09-30 22:19:27.420420', 'step': 5289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:27.476393', 'step': 5289, 'epoch': 1} {'type': 'loss', 'content': 0.12454866617918015, 'timestamp': '2025-09-30 22:19:27.479064', 'step': 5290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:27.534818', 'step': 5290, 'epoch': 1} {'type': 'loss', 'content': 0.2688524127006531, 'timestamp': '2025-09-30 22:19:27.537452', 'step': 5291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:27.593234', 'step': 5291, 'epoch': 1} {'type': 'loss', 'content': 0.19682738184928894, 'timestamp': '2025-09-30 22:19:27.599725', 'step': 5292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:27.663236', 'step': 5292, 'epoch': 1} {'type': 'loss', 'content': 0.23106779158115387, 'timestamp': '2025-09-30 22:19:27.665771', 'step': 5293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:27.722327', 'step': 5293, 'epoch': 1} {'type': 'loss', 'content': 0.16717927157878876, 'timestamp': '2025-09-30 22:19:27.725320', 'step': 5294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:27.784081', 'step': 5294, 'epoch': 1} {'type': 'loss', 'content': 0.1064961776137352, 'timestamp': '2025-09-30 22:19:27.786642', 'step': 5295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:27.842865', 'step': 5295, 'epoch': 1} {'type': 'loss', 'content': 0.13148027658462524, 'timestamp': '2025-09-30 22:19:27.849492', 'step': 5296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:27.905097', 'step': 5296, 'epoch': 1} {'type': 'loss', 'content': 0.1779850274324417, 'timestamp': '2025-09-30 22:19:27.907633', 'step': 5297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:27.963911', 'step': 5297, 'epoch': 1} {'type': 'loss', 'content': 0.16802330315113068, 'timestamp': '2025-09-30 22:19:27.966211', 'step': 5298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:28.022523', 'step': 5298, 'epoch': 1} {'type': 'loss', 'content': 0.17001193761825562, 'timestamp': '2025-09-30 22:19:28.025413', 'step': 5299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:28.081190', 'step': 5299, 'epoch': 1} {'type': 'loss', 'content': 0.160075381398201, 'timestamp': '2025-09-30 22:19:28.087281', 'step': 5300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.142888', 'step': 5300, 'epoch': 1} {'type': 'loss', 'content': 0.12327205389738083, 'timestamp': '2025-09-30 22:19:28.145764', 'step': 5301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:28.201910', 'step': 5301, 'epoch': 1} {'type': 'loss', 'content': 0.18265406787395477, 'timestamp': '2025-09-30 22:19:28.204538', 'step': 5302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.260977', 'step': 5302, 'epoch': 1} {'type': 'loss', 'content': 0.2326078563928604, 'timestamp': '2025-09-30 22:19:28.263533', 'step': 5303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.320036', 'step': 5303, 'epoch': 1} {'type': 'loss', 'content': 0.10380441695451736, 'timestamp': '2025-09-30 22:19:28.326170', 'step': 5304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.382117', 'step': 5304, 'epoch': 1} {'type': 'loss', 'content': 0.1155446469783783, 'timestamp': '2025-09-30 22:19:28.387736', 'step': 5305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:28.450087', 'step': 5305, 'epoch': 1} {'type': 'loss', 'content': 0.1924418956041336, 'timestamp': '2025-09-30 22:19:28.452416', 'step': 5306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.508665', 'step': 5306, 'epoch': 1} {'type': 'loss', 'content': 0.13090308010578156, 'timestamp': '2025-09-30 22:19:28.511347', 'step': 5307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:28.569513', 'step': 5307, 'epoch': 1} {'type': 'loss', 'content': 0.14892426133155823, 'timestamp': '2025-09-30 22:19:28.576229', 'step': 5308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.632885', 'step': 5308, 'epoch': 1} {'type': 'loss', 'content': 0.1617140769958496, 'timestamp': '2025-09-30 22:19:28.636063', 'step': 5309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:28.705410', 'step': 5309, 'epoch': 1} {'type': 'loss', 'content': 0.16142772138118744, 'timestamp': '2025-09-30 22:19:28.707962', 'step': 5310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.764233', 'step': 5310, 'epoch': 1} {'type': 'loss', 'content': 0.09157512336969376, 'timestamp': '2025-09-30 22:19:28.766761', 'step': 5311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:28.822739', 'step': 5311, 'epoch': 1} {'type': 'loss', 'content': 0.1501225233078003, 'timestamp': '2025-09-30 22:19:28.829648', 'step': 5312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:28.885280', 'step': 5312, 'epoch': 1} {'type': 'loss', 'content': 0.22363901138305664, 'timestamp': '2025-09-30 22:19:28.888759', 'step': 5313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:28.953592', 'step': 5313, 'epoch': 1} {'type': 'loss', 'content': 0.26433858275413513, 'timestamp': '2025-09-30 22:19:28.956280', 'step': 5314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:29.012649', 'step': 5314, 'epoch': 1} {'type': 'loss', 'content': 0.10911981016397476, 'timestamp': '2025-09-30 22:19:29.015192', 'step': 5315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:29.074681', 'step': 5315, 'epoch': 1} {'type': 'loss', 'content': 0.20815293490886688, 'timestamp': '2025-09-30 22:19:29.081462', 'step': 5316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:29.136743', 'step': 5316, 'epoch': 1} {'type': 'loss', 'content': 0.09719832986593246, 'timestamp': '2025-09-30 22:19:29.139094', 'step': 5317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:29.195331', 'step': 5317, 'epoch': 1} {'type': 'loss', 'content': 0.15040746331214905, 'timestamp': '2025-09-30 22:19:29.197763', 'step': 5318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:29.254216', 'step': 5318, 'epoch': 1} {'type': 'loss', 'content': 0.15772701799869537, 'timestamp': '2025-09-30 22:19:29.256759', 'step': 5319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:29.324909', 'step': 5319, 'epoch': 1} {'type': 'loss', 'content': 0.16594640910625458, 'timestamp': '2025-09-30 22:19:29.330852', 'step': 5320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:29.386655', 'step': 5320, 'epoch': 1} {'type': 'loss', 'content': 0.1250656098127365, 'timestamp': '2025-09-30 22:19:29.389296', 'step': 5321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:29.445645', 'step': 5321, 'epoch': 1} {'type': 'loss', 'content': 0.09988454729318619, 'timestamp': '2025-09-30 22:19:29.448193', 'step': 5322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:29.504747', 'step': 5322, 'epoch': 1} {'type': 'loss', 'content': 0.18532592058181763, 'timestamp': '2025-09-30 22:19:29.507393', 'step': 5323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:29.563367', 'step': 5323, 'epoch': 1} {'type': 'loss', 'content': 0.24812670052051544, 'timestamp': '2025-09-30 22:19:29.569236', 'step': 5324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:29.625741', 'step': 5324, 'epoch': 1} {'type': 'loss', 'content': 0.1644439399242401, 'timestamp': '2025-09-30 22:19:29.633980', 'step': 5325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:29.690037', 'step': 5325, 'epoch': 1} {'type': 'loss', 'content': 0.12163348495960236, 'timestamp': '2025-09-30 22:19:29.692753', 'step': 5326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:29.749769', 'step': 5326, 'epoch': 1} {'type': 'loss', 'content': 0.13639914989471436, 'timestamp': '2025-09-30 22:19:29.752631', 'step': 5327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:29.811226', 'step': 5327, 'epoch': 1} {'type': 'loss', 'content': 0.12622489035129547, 'timestamp': '2025-09-30 22:19:29.818064', 'step': 5328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:29.874350', 'step': 5328, 'epoch': 1} {'type': 'loss', 'content': 0.23608124256134033, 'timestamp': '2025-09-30 22:19:29.876832', 'step': 5329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:19:29.933143', 'step': 5329, 'epoch': 1} {'type': 'loss', 'content': 0.15000541508197784, 'timestamp': '2025-09-30 22:19:29.935861', 'step': 5330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:29.992973', 'step': 5330, 'epoch': 1} {'type': 'loss', 'content': 0.19057004153728485, 'timestamp': '2025-09-30 22:19:29.995863', 'step': 5331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:30.060230', 'step': 5331, 'epoch': 1} {'type': 'loss', 'content': 0.179440438747406, 'timestamp': '2025-09-30 22:19:30.066113', 'step': 5332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:30.124012', 'step': 5332, 'epoch': 1} {'type': 'loss', 'content': 0.17306384444236755, 'timestamp': '2025-09-30 22:19:30.126528', 'step': 5333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:30.188656', 'step': 5333, 'epoch': 1} {'type': 'loss', 'content': 0.10540402680635452, 'timestamp': '2025-09-30 22:19:30.191124', 'step': 5334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:30.247447', 'step': 5334, 'epoch': 1} {'type': 'loss', 'content': 0.23698608577251434, 'timestamp': '2025-09-30 22:19:30.250555', 'step': 5335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:30.307741', 'step': 5335, 'epoch': 1} {'type': 'loss', 'content': 0.17679914832115173, 'timestamp': '2025-09-30 22:19:30.316519', 'step': 5336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:30.376596', 'step': 5336, 'epoch': 1} {'type': 'loss', 'content': 0.1754637509584427, 'timestamp': '2025-09-30 22:19:30.379124', 'step': 5337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:30.435676', 'step': 5337, 'epoch': 1} {'type': 'loss', 'content': 0.22545787692070007, 'timestamp': '2025-09-30 22:19:30.439228', 'step': 5338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:30.496233', 'step': 5338, 'epoch': 1} {'type': 'loss', 'content': 0.17041563987731934, 'timestamp': '2025-09-30 22:19:30.498856', 'step': 5339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:30.562183', 'step': 5339, 'epoch': 1} {'type': 'loss', 'content': 0.15063753724098206, 'timestamp': '2025-09-30 22:19:30.569350', 'step': 5340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:30.628521', 'step': 5340, 'epoch': 1} {'type': 'loss', 'content': 0.07396537810564041, 'timestamp': '2025-09-30 22:19:30.635513', 'step': 5341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:30.694198', 'step': 5341, 'epoch': 1} {'type': 'loss', 'content': 0.05830296128988266, 'timestamp': '2025-09-30 22:19:30.699298', 'step': 5342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:30.761589', 'step': 5342, 'epoch': 1} {'type': 'loss', 'content': 0.1840197741985321, 'timestamp': '2025-09-30 22:19:30.764269', 'step': 5343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:30.820723', 'step': 5343, 'epoch': 1} {'type': 'loss', 'content': 0.1760907769203186, 'timestamp': '2025-09-30 22:19:30.827371', 'step': 5344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:30.888188', 'step': 5344, 'epoch': 1} {'type': 'loss', 'content': 0.2078980952501297, 'timestamp': '2025-09-30 22:19:30.890815', 'step': 5345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:30.947871', 'step': 5345, 'epoch': 1} {'type': 'loss', 'content': 0.242655947804451, 'timestamp': '2025-09-30 22:19:30.950212', 'step': 5346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:31.006206', 'step': 5346, 'epoch': 1} {'type': 'loss', 'content': 0.2070915400981903, 'timestamp': '2025-09-30 22:19:31.010363', 'step': 5347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:31.090291', 'step': 5347, 'epoch': 1} {'type': 'loss', 'content': 0.22912609577178955, 'timestamp': '2025-09-30 22:19:31.096502', 'step': 5348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:31.162854', 'step': 5348, 'epoch': 1} {'type': 'loss', 'content': 0.16433395445346832, 'timestamp': '2025-09-30 22:19:31.165275', 'step': 5349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:31.222393', 'step': 5349, 'epoch': 1} {'type': 'loss', 'content': 0.22156859934329987, 'timestamp': '2025-09-30 22:19:31.225558', 'step': 5350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:31.290514', 'step': 5350, 'epoch': 1} {'type': 'loss', 'content': 0.21195784211158752, 'timestamp': '2025-09-30 22:19:31.293107', 'step': 5351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:31.350533', 'step': 5351, 'epoch': 1} {'type': 'loss', 'content': 0.20836405456066132, 'timestamp': '2025-09-30 22:19:31.356553', 'step': 5352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:31.413524', 'step': 5352, 'epoch': 1} {'type': 'loss', 'content': 0.12883879244327545, 'timestamp': '2025-09-30 22:19:31.416869', 'step': 5353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:31.483549', 'step': 5353, 'epoch': 1} {'type': 'loss', 'content': 0.1345757395029068, 'timestamp': '2025-09-30 22:19:31.486505', 'step': 5354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:31.545038', 'step': 5354, 'epoch': 1} {'type': 'loss', 'content': 0.2112922966480255, 'timestamp': '2025-09-30 22:19:31.555189', 'step': 5355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:31.611850', 'step': 5355, 'epoch': 1} {'type': 'loss', 'content': 0.19228361546993256, 'timestamp': '2025-09-30 22:19:31.618749', 'step': 5356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:31.673725', 'step': 5356, 'epoch': 1} {'type': 'loss', 'content': 0.1527947634458542, 'timestamp': '2025-09-30 22:19:31.677052', 'step': 5357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:31.734914', 'step': 5357, 'epoch': 1} {'type': 'loss', 'content': 0.20975333452224731, 'timestamp': '2025-09-30 22:19:31.740200', 'step': 5358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:31.800238', 'step': 5358, 'epoch': 1} {'type': 'loss', 'content': 0.11538232862949371, 'timestamp': '2025-09-30 22:19:31.803016', 'step': 5359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:31.860005', 'step': 5359, 'epoch': 1} {'type': 'loss', 'content': 0.16773097217082977, 'timestamp': '2025-09-30 22:19:31.866629', 'step': 5360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:31.922115', 'step': 5360, 'epoch': 1} {'type': 'loss', 'content': 0.11667975783348083, 'timestamp': '2025-09-30 22:19:31.924913', 'step': 5361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:31.981508', 'step': 5361, 'epoch': 1} {'type': 'loss', 'content': 0.14331917464733124, 'timestamp': '2025-09-30 22:19:31.987769', 'step': 5362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.046001', 'step': 5362, 'epoch': 1} {'type': 'loss', 'content': 0.1503213495016098, 'timestamp': '2025-09-30 22:19:32.049885', 'step': 5363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.107158', 'step': 5363, 'epoch': 1} {'type': 'loss', 'content': 0.11990148574113846, 'timestamp': '2025-09-30 22:19:32.113347', 'step': 5364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:32.170362', 'step': 5364, 'epoch': 1} {'type': 'loss', 'content': 0.12522420287132263, 'timestamp': '2025-09-30 22:19:32.172970', 'step': 5365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:32.237969', 'step': 5365, 'epoch': 1} {'type': 'loss', 'content': 0.140876904129982, 'timestamp': '2025-09-30 22:19:32.240466', 'step': 5366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.297634', 'step': 5366, 'epoch': 1} {'type': 'loss', 'content': 0.12309493124485016, 'timestamp': '2025-09-30 22:19:32.300178', 'step': 5367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.360451', 'step': 5367, 'epoch': 1} {'type': 'loss', 'content': 0.1284766048192978, 'timestamp': '2025-09-30 22:19:32.366943', 'step': 5368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:32.437401', 'step': 5368, 'epoch': 1} {'type': 'loss', 'content': 0.18555030226707458, 'timestamp': '2025-09-30 22:19:32.442788', 'step': 5369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:32.501088', 'step': 5369, 'epoch': 1} {'type': 'loss', 'content': 0.12008507549762726, 'timestamp': '2025-09-30 22:19:32.506128', 'step': 5370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:32.583900', 'step': 5370, 'epoch': 1} {'type': 'loss', 'content': 0.15442496538162231, 'timestamp': '2025-09-30 22:19:32.589878', 'step': 5371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.650636', 'step': 5371, 'epoch': 1} {'type': 'loss', 'content': 0.18185946345329285, 'timestamp': '2025-09-30 22:19:32.660417', 'step': 5372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:32.719856', 'step': 5372, 'epoch': 1} {'type': 'loss', 'content': 0.1566668301820755, 'timestamp': '2025-09-30 22:19:32.724915', 'step': 5373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.784635', 'step': 5373, 'epoch': 1} {'type': 'loss', 'content': 0.13690565526485443, 'timestamp': '2025-09-30 22:19:32.786796', 'step': 5374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:32.843832', 'step': 5374, 'epoch': 1} {'type': 'loss', 'content': 0.08485863357782364, 'timestamp': '2025-09-30 22:19:32.847513', 'step': 5375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.903937', 'step': 5375, 'epoch': 1} {'type': 'loss', 'content': 0.1353559046983719, 'timestamp': '2025-09-30 22:19:32.910068', 'step': 5376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:32.965974', 'step': 5376, 'epoch': 1} {'type': 'loss', 'content': 0.13426473736763, 'timestamp': '2025-09-30 22:19:32.969397', 'step': 5377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:33.027021', 'step': 5377, 'epoch': 1} {'type': 'loss', 'content': 0.15064500272274017, 'timestamp': '2025-09-30 22:19:33.029984', 'step': 5378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:33.098028', 'step': 5378, 'epoch': 1} {'type': 'loss', 'content': 0.2263418436050415, 'timestamp': '2025-09-30 22:19:33.100481', 'step': 5379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:33.156679', 'step': 5379, 'epoch': 1} {'type': 'loss', 'content': 0.19013068079948425, 'timestamp': '2025-09-30 22:19:33.162634', 'step': 5380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:33.218496', 'step': 5380, 'epoch': 1} {'type': 'loss', 'content': 0.1163654625415802, 'timestamp': '2025-09-30 22:19:33.220982', 'step': 5381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:33.287033', 'step': 5381, 'epoch': 1} {'type': 'loss', 'content': 0.110829658806324, 'timestamp': '2025-09-30 22:19:33.289636', 'step': 5382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:33.346235', 'step': 5382, 'epoch': 1} {'type': 'loss', 'content': 0.09479601681232452, 'timestamp': '2025-09-30 22:19:33.349018', 'step': 5383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:33.405091', 'step': 5383, 'epoch': 1} {'type': 'loss', 'content': 0.16281643509864807, 'timestamp': '2025-09-30 22:19:33.411694', 'step': 5384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:33.466995', 'step': 5384, 'epoch': 1} {'type': 'loss', 'content': 0.12979574501514435, 'timestamp': '2025-09-30 22:19:33.470695', 'step': 5385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:33.535128', 'step': 5385, 'epoch': 1} {'type': 'loss', 'content': 0.11522319912910461, 'timestamp': '2025-09-30 22:19:33.537631', 'step': 5386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:33.594360', 'step': 5386, 'epoch': 1} {'type': 'loss', 'content': 0.14425399899482727, 'timestamp': '2025-09-30 22:19:33.598904', 'step': 5387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:33.662634', 'step': 5387, 'epoch': 1} {'type': 'loss', 'content': 0.1698731780052185, 'timestamp': '2025-09-30 22:19:33.669442', 'step': 5388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:33.727010', 'step': 5388, 'epoch': 1} {'type': 'loss', 'content': 0.1303081065416336, 'timestamp': '2025-09-30 22:19:33.729351', 'step': 5389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:33.792306', 'step': 5389, 'epoch': 1} {'type': 'loss', 'content': 0.20786648988723755, 'timestamp': '2025-09-30 22:19:33.795507', 'step': 5390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:33.852860', 'step': 5390, 'epoch': 1} {'type': 'loss', 'content': 0.19646720588207245, 'timestamp': '2025-09-30 22:19:33.855760', 'step': 5391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:33.913855', 'step': 5391, 'epoch': 1} {'type': 'loss', 'content': 0.23408707976341248, 'timestamp': '2025-09-30 22:19:33.924657', 'step': 5392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:34.011003', 'step': 5392, 'epoch': 1} {'type': 'loss', 'content': 0.0866212397813797, 'timestamp': '2025-09-30 22:19:34.015216', 'step': 5393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:34.072618', 'step': 5393, 'epoch': 1} {'type': 'loss', 'content': 0.13674570620059967, 'timestamp': '2025-09-30 22:19:34.075229', 'step': 5394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:34.133044', 'step': 5394, 'epoch': 1} {'type': 'loss', 'content': 0.23320960998535156, 'timestamp': '2025-09-30 22:19:34.139023', 'step': 5395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:34.196436', 'step': 5395, 'epoch': 1} {'type': 'loss', 'content': 0.2233680635690689, 'timestamp': '2025-09-30 22:19:34.203686', 'step': 5396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:34.261145', 'step': 5396, 'epoch': 1} {'type': 'loss', 'content': 0.12055530399084091, 'timestamp': '2025-09-30 22:19:34.263936', 'step': 5397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:34.325602', 'step': 5397, 'epoch': 1} {'type': 'loss', 'content': 0.20132917165756226, 'timestamp': '2025-09-30 22:19:34.329935', 'step': 5398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:34.387626', 'step': 5398, 'epoch': 1} {'type': 'loss', 'content': 0.2191626876592636, 'timestamp': '2025-09-30 22:19:34.393171', 'step': 5399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:34.449912', 'step': 5399, 'epoch': 1} {'type': 'loss', 'content': 0.16796639561653137, 'timestamp': '2025-09-30 22:19:34.456363', 'step': 5400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:34.513273', 'step': 5400, 'epoch': 1} {'type': 'loss', 'content': 0.12241395562887192, 'timestamp': '2025-09-30 22:19:34.517782', 'step': 5401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:34.574320', 'step': 5401, 'epoch': 1} {'type': 'loss', 'content': 0.1442829966545105, 'timestamp': '2025-09-30 22:19:34.579853', 'step': 5402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:34.637871', 'step': 5402, 'epoch': 1} {'type': 'loss', 'content': 0.15128380060195923, 'timestamp': '2025-09-30 22:19:34.641457', 'step': 5403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:34.699482', 'step': 5403, 'epoch': 1} {'type': 'loss', 'content': 0.1271435171365738, 'timestamp': '2025-09-30 22:19:34.706658', 'step': 5404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:34.765043', 'step': 5404, 'epoch': 1} {'type': 'loss', 'content': 0.12614496052265167, 'timestamp': '2025-09-30 22:19:34.767739', 'step': 5405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:34.825321', 'step': 5405, 'epoch': 1} {'type': 'loss', 'content': 0.1875106692314148, 'timestamp': '2025-09-30 22:19:34.827902', 'step': 5406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:34.887569', 'step': 5406, 'epoch': 1} {'type': 'loss', 'content': 0.1640857756137848, 'timestamp': '2025-09-30 22:19:34.890239', 'step': 5407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:34.949700', 'step': 5407, 'epoch': 1} {'type': 'loss', 'content': 0.12553660571575165, 'timestamp': '2025-09-30 22:19:34.957880', 'step': 5408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:35.017253', 'step': 5408, 'epoch': 1} {'type': 'loss', 'content': 0.09346018731594086, 'timestamp': '2025-09-30 22:19:35.021914', 'step': 5409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:35.078874', 'step': 5409, 'epoch': 1} {'type': 'loss', 'content': 0.11110217124223709, 'timestamp': '2025-09-30 22:19:35.081525', 'step': 5410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.140800', 'step': 5410, 'epoch': 1} {'type': 'loss', 'content': 0.14891904592514038, 'timestamp': '2025-09-30 22:19:35.145758', 'step': 5411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:35.202671', 'step': 5411, 'epoch': 1} {'type': 'loss', 'content': 0.23529022932052612, 'timestamp': '2025-09-30 22:19:35.209180', 'step': 5412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.267396', 'step': 5412, 'epoch': 1} {'type': 'loss', 'content': 0.13767297565937042, 'timestamp': '2025-09-30 22:19:35.272334', 'step': 5413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:35.329240', 'step': 5413, 'epoch': 1} {'type': 'loss', 'content': 0.134921133518219, 'timestamp': '2025-09-30 22:19:35.332189', 'step': 5414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.392667', 'step': 5414, 'epoch': 1} {'type': 'loss', 'content': 0.21322661638259888, 'timestamp': '2025-09-30 22:19:35.395530', 'step': 5415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.452344', 'step': 5415, 'epoch': 1} {'type': 'loss', 'content': 0.2521969676017761, 'timestamp': '2025-09-30 22:19:35.461281', 'step': 5416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:35.520943', 'step': 5416, 'epoch': 1} {'type': 'loss', 'content': 0.17399728298187256, 'timestamp': '2025-09-30 22:19:35.523746', 'step': 5417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:35.580090', 'step': 5417, 'epoch': 1} {'type': 'loss', 'content': 0.2123226523399353, 'timestamp': '2025-09-30 22:19:35.583245', 'step': 5418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.641759', 'step': 5418, 'epoch': 1} {'type': 'loss', 'content': 0.12874989211559296, 'timestamp': '2025-09-30 22:19:35.647322', 'step': 5419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:35.704382', 'step': 5419, 'epoch': 1} {'type': 'loss', 'content': 0.12149550765752792, 'timestamp': '2025-09-30 22:19:35.714207', 'step': 5420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.769852', 'step': 5420, 'epoch': 1} {'type': 'loss', 'content': 0.12308130413293839, 'timestamp': '2025-09-30 22:19:35.774431', 'step': 5421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.831049', 'step': 5421, 'epoch': 1} {'type': 'loss', 'content': 0.22339026629924774, 'timestamp': '2025-09-30 22:19:35.835106', 'step': 5422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:35.892662', 'step': 5422, 'epoch': 1} {'type': 'loss', 'content': 0.14996026456356049, 'timestamp': '2025-09-30 22:19:35.897343', 'step': 5423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:35.955532', 'step': 5423, 'epoch': 1} {'type': 'loss', 'content': 0.1928885132074356, 'timestamp': '2025-09-30 22:19:35.962077', 'step': 5424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:36.020903', 'step': 5424, 'epoch': 1} {'type': 'loss', 'content': 0.15750569105148315, 'timestamp': '2025-09-30 22:19:36.027929', 'step': 5425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:36.087337', 'step': 5425, 'epoch': 1} {'type': 'loss', 'content': 0.10428477823734283, 'timestamp': '2025-09-30 22:19:36.093000', 'step': 5426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:36.152242', 'step': 5426, 'epoch': 1} {'type': 'loss', 'content': 0.09897749125957489, 'timestamp': '2025-09-30 22:19:36.155119', 'step': 5427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:36.211465', 'step': 5427, 'epoch': 1} {'type': 'loss', 'content': 0.24510464072227478, 'timestamp': '2025-09-30 22:19:36.217711', 'step': 5428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:36.275183', 'step': 5428, 'epoch': 1} {'type': 'loss', 'content': 0.12609514594078064, 'timestamp': '2025-09-30 22:19:36.278238', 'step': 5429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:36.335278', 'step': 5429, 'epoch': 1} {'type': 'loss', 'content': 0.12035064399242401, 'timestamp': '2025-09-30 22:19:36.338514', 'step': 5430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:36.394366', 'step': 5430, 'epoch': 1} {'type': 'loss', 'content': 0.1717541366815567, 'timestamp': '2025-09-30 22:19:36.396797', 'step': 5431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:36.452461', 'step': 5431, 'epoch': 1} {'type': 'loss', 'content': 0.11789561808109283, 'timestamp': '2025-09-30 22:19:36.460432', 'step': 5432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:36.515976', 'step': 5432, 'epoch': 1} {'type': 'loss', 'content': 0.13753196597099304, 'timestamp': '2025-09-30 22:19:36.519081', 'step': 5433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:36.578896', 'step': 5433, 'epoch': 1} {'type': 'loss', 'content': 0.19107109308242798, 'timestamp': '2025-09-30 22:19:36.581565', 'step': 5434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:36.638110', 'step': 5434, 'epoch': 1} {'type': 'loss', 'content': 0.27052322030067444, 'timestamp': '2025-09-30 22:19:36.641397', 'step': 5435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:36.698296', 'step': 5435, 'epoch': 1} {'type': 'loss', 'content': 0.14940977096557617, 'timestamp': '2025-09-30 22:19:36.706115', 'step': 5436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:36.765552', 'step': 5436, 'epoch': 1} {'type': 'loss', 'content': 0.16258926689624786, 'timestamp': '2025-09-30 22:19:36.769858', 'step': 5437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:36.834084', 'step': 5437, 'epoch': 1} {'type': 'loss', 'content': 0.15235251188278198, 'timestamp': '2025-09-30 22:19:36.838654', 'step': 5438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:36.894977', 'step': 5438, 'epoch': 1} {'type': 'loss', 'content': 0.14387930929660797, 'timestamp': '2025-09-30 22:19:36.897758', 'step': 5439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:36.963437', 'step': 5439, 'epoch': 1} {'type': 'loss', 'content': 0.12933389842510223, 'timestamp': '2025-09-30 22:19:36.977104', 'step': 5440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.037467', 'step': 5440, 'epoch': 1} {'type': 'loss', 'content': 0.219633087515831, 'timestamp': '2025-09-30 22:19:37.040477', 'step': 5441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.096219', 'step': 5441, 'epoch': 1} {'type': 'loss', 'content': 0.18346983194351196, 'timestamp': '2025-09-30 22:19:37.098925', 'step': 5442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:37.155440', 'step': 5442, 'epoch': 1} {'type': 'loss', 'content': 0.1571679711341858, 'timestamp': '2025-09-30 22:19:37.158484', 'step': 5443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:37.218126', 'step': 5443, 'epoch': 1} {'type': 'loss', 'content': 0.13032858073711395, 'timestamp': '2025-09-30 22:19:37.224767', 'step': 5444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:37.281231', 'step': 5444, 'epoch': 1} {'type': 'loss', 'content': 0.1485285460948944, 'timestamp': '2025-09-30 22:19:37.284365', 'step': 5445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.341351', 'step': 5445, 'epoch': 1} {'type': 'loss', 'content': 0.12909652292728424, 'timestamp': '2025-09-30 22:19:37.343816', 'step': 5446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.400006', 'step': 5446, 'epoch': 1} {'type': 'loss', 'content': 0.16559727489948273, 'timestamp': '2025-09-30 22:19:37.402323', 'step': 5447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:37.468195', 'step': 5447, 'epoch': 1} {'type': 'loss', 'content': 0.15453730523586273, 'timestamp': '2025-09-30 22:19:37.475243', 'step': 5448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.531874', 'step': 5448, 'epoch': 1} {'type': 'loss', 'content': 0.18876925110816956, 'timestamp': '2025-09-30 22:19:37.534384', 'step': 5449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:37.590553', 'step': 5449, 'epoch': 1} {'type': 'loss', 'content': 0.14181026816368103, 'timestamp': '2025-09-30 22:19:37.593750', 'step': 5450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:37.650279', 'step': 5450, 'epoch': 1} {'type': 'loss', 'content': 0.2708759307861328, 'timestamp': '2025-09-30 22:19:37.658064', 'step': 5451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:37.717556', 'step': 5451, 'epoch': 1} {'type': 'loss', 'content': 0.13962867856025696, 'timestamp': '2025-09-30 22:19:37.724937', 'step': 5452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:37.780860', 'step': 5452, 'epoch': 1} {'type': 'loss', 'content': 0.16728392243385315, 'timestamp': '2025-09-30 22:19:37.783553', 'step': 5453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.840009', 'step': 5453, 'epoch': 1} {'type': 'loss', 'content': 0.2704724073410034, 'timestamp': '2025-09-30 22:19:37.842551', 'step': 5454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.899876', 'step': 5454, 'epoch': 1} {'type': 'loss', 'content': 0.13873514533042908, 'timestamp': '2025-09-30 22:19:37.902593', 'step': 5455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:37.960884', 'step': 5455, 'epoch': 1} {'type': 'loss', 'content': 0.09504351019859314, 'timestamp': '2025-09-30 22:19:37.970893', 'step': 5456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:38.043639', 'step': 5456, 'epoch': 1} {'type': 'loss', 'content': 0.11523875594139099, 'timestamp': '2025-09-30 22:19:38.047375', 'step': 5457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:38.105910', 'step': 5457, 'epoch': 1} {'type': 'loss', 'content': 0.23920997977256775, 'timestamp': '2025-09-30 22:19:38.110030', 'step': 5458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:38.167618', 'step': 5458, 'epoch': 1} {'type': 'loss', 'content': 0.14149288833141327, 'timestamp': '2025-09-30 22:19:38.171821', 'step': 5459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:38.229765', 'step': 5459, 'epoch': 1} {'type': 'loss', 'content': 0.18951991200447083, 'timestamp': '2025-09-30 22:19:38.236743', 'step': 5460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:38.298241', 'step': 5460, 'epoch': 1} {'type': 'loss', 'content': 0.1318759322166443, 'timestamp': '2025-09-30 22:19:38.302309', 'step': 5461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:38.361127', 'step': 5461, 'epoch': 1} {'type': 'loss', 'content': 0.14126932621002197, 'timestamp': '2025-09-30 22:19:38.365575', 'step': 5462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:38.424337', 'step': 5462, 'epoch': 1} {'type': 'loss', 'content': 0.16565540432929993, 'timestamp': '2025-09-30 22:19:38.430870', 'step': 5463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:38.489660', 'step': 5463, 'epoch': 1} {'type': 'loss', 'content': 0.0936383530497551, 'timestamp': '2025-09-30 22:19:38.496430', 'step': 5464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:38.560465', 'step': 5464, 'epoch': 1} {'type': 'loss', 'content': 0.08718759566545486, 'timestamp': '2025-09-30 22:19:38.566899', 'step': 5465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:38.629139', 'step': 5465, 'epoch': 1} {'type': 'loss', 'content': 0.1722622811794281, 'timestamp': '2025-09-30 22:19:38.631999', 'step': 5466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:38.688895', 'step': 5466, 'epoch': 1} {'type': 'loss', 'content': 0.17286404967308044, 'timestamp': '2025-09-30 22:19:38.693137', 'step': 5467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:38.750993', 'step': 5467, 'epoch': 1} {'type': 'loss', 'content': 0.21795545518398285, 'timestamp': '2025-09-30 22:19:38.757975', 'step': 5468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:38.816893', 'step': 5468, 'epoch': 1} {'type': 'loss', 'content': 0.1214924156665802, 'timestamp': '2025-09-30 22:19:38.819502', 'step': 5469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:38.876534', 'step': 5469, 'epoch': 1} {'type': 'loss', 'content': 0.2656015157699585, 'timestamp': '2025-09-30 22:19:38.882417', 'step': 5470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:38.943768', 'step': 5470, 'epoch': 1} {'type': 'loss', 'content': 0.14159978926181793, 'timestamp': '2025-09-30 22:19:38.948469', 'step': 5471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:39.005010', 'step': 5471, 'epoch': 1} {'type': 'loss', 'content': 0.16191059350967407, 'timestamp': '2025-09-30 22:19:39.019164', 'step': 5472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:39.080922', 'step': 5472, 'epoch': 1} {'type': 'loss', 'content': 0.13509593904018402, 'timestamp': '2025-09-30 22:19:39.084158', 'step': 5473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:39.151295', 'step': 5473, 'epoch': 1} {'type': 'loss', 'content': 0.16886122524738312, 'timestamp': '2025-09-30 22:19:39.154104', 'step': 5474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:39.210676', 'step': 5474, 'epoch': 1} {'type': 'loss', 'content': 0.10028120130300522, 'timestamp': '2025-09-30 22:19:39.216639', 'step': 5475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:39.288994', 'step': 5475, 'epoch': 1} {'type': 'loss', 'content': 0.10213425010442734, 'timestamp': '2025-09-30 22:19:39.295485', 'step': 5476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:39.351419', 'step': 5476, 'epoch': 1} {'type': 'loss', 'content': 0.24067382514476776, 'timestamp': '2025-09-30 22:19:39.354328', 'step': 5477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:39.410694', 'step': 5477, 'epoch': 1} {'type': 'loss', 'content': 0.24434404075145721, 'timestamp': '2025-09-30 22:19:39.413870', 'step': 5478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:39.473336', 'step': 5478, 'epoch': 1} {'type': 'loss', 'content': 0.161018967628479, 'timestamp': '2025-09-30 22:19:39.480802', 'step': 5479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:39.537022', 'step': 5479, 'epoch': 1} {'type': 'loss', 'content': 0.10599357634782791, 'timestamp': '2025-09-30 22:19:39.546073', 'step': 5480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:39.609564', 'step': 5480, 'epoch': 1} {'type': 'loss', 'content': 0.2147425413131714, 'timestamp': '2025-09-30 22:19:39.612417', 'step': 5481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:39.673173', 'step': 5481, 'epoch': 1} {'type': 'loss', 'content': 0.10823732614517212, 'timestamp': '2025-09-30 22:19:39.676053', 'step': 5482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:39.734283', 'step': 5482, 'epoch': 1} {'type': 'loss', 'content': 0.14770372211933136, 'timestamp': '2025-09-30 22:19:39.737343', 'step': 5483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:39.796975', 'step': 5483, 'epoch': 1} {'type': 'loss', 'content': 0.19403034448623657, 'timestamp': '2025-09-30 22:19:39.803822', 'step': 5484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:39.861123', 'step': 5484, 'epoch': 1} {'type': 'loss', 'content': 0.10935306549072266, 'timestamp': '2025-09-30 22:19:39.864546', 'step': 5485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:39.927850', 'step': 5485, 'epoch': 1} {'type': 'loss', 'content': 0.23043780028820038, 'timestamp': '2025-09-30 22:19:39.931542', 'step': 5486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:39.989287', 'step': 5486, 'epoch': 1} {'type': 'loss', 'content': 0.15312069654464722, 'timestamp': '2025-09-30 22:19:40.001538', 'step': 5487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:40.063242', 'step': 5487, 'epoch': 1} {'type': 'loss', 'content': 0.16419775784015656, 'timestamp': '2025-09-30 22:19:40.070226', 'step': 5488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:40.127509', 'step': 5488, 'epoch': 1} {'type': 'loss', 'content': 0.08981683105230331, 'timestamp': '2025-09-30 22:19:40.130435', 'step': 5489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:40.191666', 'step': 5489, 'epoch': 1} {'type': 'loss', 'content': 0.14210142195224762, 'timestamp': '2025-09-30 22:19:40.194144', 'step': 5490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:40.262324', 'step': 5490, 'epoch': 1} {'type': 'loss', 'content': 0.09455128759145737, 'timestamp': '2025-09-30 22:19:40.266381', 'step': 5491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:40.327240', 'step': 5491, 'epoch': 1} {'type': 'loss', 'content': 0.10396112501621246, 'timestamp': '2025-09-30 22:19:40.333424', 'step': 5492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:40.391245', 'step': 5492, 'epoch': 1} {'type': 'loss', 'content': 0.19647085666656494, 'timestamp': '2025-09-30 22:19:40.394256', 'step': 5493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:40.450405', 'step': 5493, 'epoch': 1} {'type': 'loss', 'content': 0.1348085105419159, 'timestamp': '2025-09-30 22:19:40.454339', 'step': 5494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:40.516888', 'step': 5494, 'epoch': 1} {'type': 'loss', 'content': 0.14447177946567535, 'timestamp': '2025-09-30 22:19:40.520329', 'step': 5495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:40.580464', 'step': 5495, 'epoch': 1} {'type': 'loss', 'content': 0.20566709339618683, 'timestamp': '2025-09-30 22:19:40.588079', 'step': 5496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:40.651813', 'step': 5496, 'epoch': 1} {'type': 'loss', 'content': 0.12890776991844177, 'timestamp': '2025-09-30 22:19:40.655028', 'step': 5497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:40.711761', 'step': 5497, 'epoch': 1} {'type': 'loss', 'content': 0.13123957812786102, 'timestamp': '2025-09-30 22:19:40.715439', 'step': 5498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:40.784276', 'step': 5498, 'epoch': 1} {'type': 'loss', 'content': 0.16489027440547943, 'timestamp': '2025-09-30 22:19:40.788163', 'step': 5499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:40.847018', 'step': 5499, 'epoch': 1} {'type': 'loss', 'content': 0.18464893102645874, 'timestamp': '2025-09-30 22:19:40.853802', 'step': 5500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-30 22:19:41.238237', 'step': 5500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:41.298844', 'step': 5500, 'epoch': 1} {'type': 'loss', 'content': 0.18528954684734344, 'timestamp': '2025-09-30 22:19:41.303276', 'step': 5501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:41.363401', 'step': 5501, 'epoch': 1} {'type': 'loss', 'content': 0.2583766579627991, 'timestamp': '2025-09-30 22:19:41.366780', 'step': 5502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:41.422868', 'step': 5502, 'epoch': 1} {'type': 'loss', 'content': 0.03417947515845299, 'timestamp': '2025-09-30 22:19:41.426812', 'step': 5503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:41.484906', 'step': 5503, 'epoch': 1} {'type': 'loss', 'content': 0.12510009109973907, 'timestamp': '2025-09-30 22:19:41.491642', 'step': 5504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:41.547735', 'step': 5504, 'epoch': 1} {'type': 'loss', 'content': 0.12137920409440994, 'timestamp': '2025-09-30 22:19:41.554957', 'step': 5505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:19:41.611512', 'step': 5505, 'epoch': 1} {'type': 'loss', 'content': 0.202457994222641, 'timestamp': '2025-09-30 22:19:41.614228', 'step': 5506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:41.672567', 'step': 5506, 'epoch': 1} {'type': 'loss', 'content': 0.1968958079814911, 'timestamp': '2025-09-30 22:19:41.675899', 'step': 5507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:41.732517', 'step': 5507, 'epoch': 1} {'type': 'loss', 'content': 0.1633862406015396, 'timestamp': '2025-09-30 22:19:41.740005', 'step': 5508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:41.796219', 'step': 5508, 'epoch': 1} {'type': 'loss', 'content': 0.15342558920383453, 'timestamp': '2025-09-30 22:19:41.799599', 'step': 5509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:41.862413', 'step': 5509, 'epoch': 1} {'type': 'loss', 'content': 0.18486911058425903, 'timestamp': '2025-09-30 22:19:41.865621', 'step': 5510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:41.922626', 'step': 5510, 'epoch': 1} {'type': 'loss', 'content': 0.18129956722259521, 'timestamp': '2025-09-30 22:19:41.925898', 'step': 5511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:41.981995', 'step': 5511, 'epoch': 1} {'type': 'loss', 'content': 0.1267140507698059, 'timestamp': '2025-09-30 22:19:41.988759', 'step': 5512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:42.045433', 'step': 5512, 'epoch': 1} {'type': 'loss', 'content': 0.2227361649274826, 'timestamp': '2025-09-30 22:19:42.048634', 'step': 5513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:42.112320', 'step': 5513, 'epoch': 1} {'type': 'loss', 'content': 0.144570991396904, 'timestamp': '2025-09-30 22:19:42.115475', 'step': 5514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:42.171831', 'step': 5514, 'epoch': 1} {'type': 'loss', 'content': 0.13945110142230988, 'timestamp': '2025-09-30 22:19:42.177937', 'step': 5515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:42.236122', 'step': 5515, 'epoch': 1} {'type': 'loss', 'content': 0.09252849221229553, 'timestamp': '2025-09-30 22:19:42.244741', 'step': 5516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:42.302412', 'step': 5516, 'epoch': 1} {'type': 'loss', 'content': 0.1513633131980896, 'timestamp': '2025-09-30 22:19:42.307029', 'step': 5517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:42.364930', 'step': 5517, 'epoch': 1} {'type': 'loss', 'content': 0.17675843834877014, 'timestamp': '2025-09-30 22:19:42.369437', 'step': 5518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:42.426734', 'step': 5518, 'epoch': 1} {'type': 'loss', 'content': 0.11450818926095963, 'timestamp': '2025-09-30 22:19:42.429858', 'step': 5519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:42.486981', 'step': 5519, 'epoch': 1} {'type': 'loss', 'content': 0.11536888033151627, 'timestamp': '2025-09-30 22:19:42.496871', 'step': 5520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:42.552891', 'step': 5520, 'epoch': 1} {'type': 'loss', 'content': 0.20653685927391052, 'timestamp': '2025-09-30 22:19:42.563141', 'step': 5521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:42.619405', 'step': 5521, 'epoch': 1} {'type': 'loss', 'content': 0.09920825809240341, 'timestamp': '2025-09-30 22:19:42.623152', 'step': 5522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:42.682525', 'step': 5522, 'epoch': 1} {'type': 'loss', 'content': 0.1609584093093872, 'timestamp': '2025-09-30 22:19:42.685867', 'step': 5523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:42.742899', 'step': 5523, 'epoch': 1} {'type': 'loss', 'content': 0.16409258544445038, 'timestamp': '2025-09-30 22:19:42.750792', 'step': 5524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:42.807140', 'step': 5524, 'epoch': 1} {'type': 'loss', 'content': 0.0993466004729271, 'timestamp': '2025-09-30 22:19:42.811237', 'step': 5525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:19:42.868255', 'step': 5525, 'epoch': 1} {'type': 'loss', 'content': 0.21750259399414062, 'timestamp': '2025-09-30 22:19:42.871238', 'step': 5526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:19:42.932373', 'step': 5526, 'epoch': 1} {'type': 'loss', 'content': 0.16885648667812347, 'timestamp': '2025-09-30 22:19:42.935620', 'step': 5527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:42.992999', 'step': 5527, 'epoch': 1} {'type': 'loss', 'content': 0.16707810759544373, 'timestamp': '2025-09-30 22:19:42.999018', 'step': 5528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:43.063762', 'step': 5528, 'epoch': 1} {'type': 'loss', 'content': 0.0877135768532753, 'timestamp': '2025-09-30 22:19:43.067419', 'step': 5529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:43.131853', 'step': 5529, 'epoch': 1} {'type': 'loss', 'content': 0.15252162516117096, 'timestamp': '2025-09-30 22:19:43.139143', 'step': 5530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:43.202461', 'step': 5530, 'epoch': 1} {'type': 'loss', 'content': 0.14766770601272583, 'timestamp': '2025-09-30 22:19:43.207623', 'step': 5531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:43.264993', 'step': 5531, 'epoch': 1} {'type': 'loss', 'content': 0.21161355078220367, 'timestamp': '2025-09-30 22:19:43.279054', 'step': 5532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:43.343904', 'step': 5532, 'epoch': 1} {'type': 'loss', 'content': 0.16697874665260315, 'timestamp': '2025-09-30 22:19:43.349978', 'step': 5533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:43.408013', 'step': 5533, 'epoch': 1} {'type': 'loss', 'content': 0.2710328698158264, 'timestamp': '2025-09-30 22:19:43.427633', 'step': 5534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:19:43.486573', 'step': 5534, 'epoch': 1} {'type': 'loss', 'content': 0.19339841604232788, 'timestamp': '2025-09-30 22:19:43.493388', 'step': 5535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:43.553193', 'step': 5535, 'epoch': 1} {'type': 'loss', 'content': 0.16284573078155518, 'timestamp': '2025-09-30 22:19:43.561091', 'step': 5536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:43.624556', 'step': 5536, 'epoch': 1} {'type': 'loss', 'content': 0.14289546012878418, 'timestamp': '2025-09-30 22:19:43.629158', 'step': 5537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:43.691022', 'step': 5537, 'epoch': 1} {'type': 'loss', 'content': 0.18489299714565277, 'timestamp': '2025-09-30 22:19:43.697781', 'step': 5538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:43.757013', 'step': 5538, 'epoch': 1} {'type': 'loss', 'content': 0.19780901074409485, 'timestamp': '2025-09-30 22:19:43.764684', 'step': 5539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:43.822175', 'step': 5539, 'epoch': 1} {'type': 'loss', 'content': 0.17991812527179718, 'timestamp': '2025-09-30 22:19:43.830906', 'step': 5540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:43.904845', 'step': 5540, 'epoch': 1} {'type': 'loss', 'content': 0.12365186959505081, 'timestamp': '2025-09-30 22:19:43.910243', 'step': 5541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:43.978351', 'step': 5541, 'epoch': 1} {'type': 'loss', 'content': 0.14028063416481018, 'timestamp': '2025-09-30 22:19:43.990202', 'step': 5542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:44.049021', 'step': 5542, 'epoch': 1} {'type': 'loss', 'content': 0.14515559375286102, 'timestamp': '2025-09-30 22:19:44.054736', 'step': 5543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:44.117040', 'step': 5543, 'epoch': 1} {'type': 'loss', 'content': 0.165866881608963, 'timestamp': '2025-09-30 22:19:44.127057', 'step': 5544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:44.186460', 'step': 5544, 'epoch': 1} {'type': 'loss', 'content': 0.11545486003160477, 'timestamp': '2025-09-30 22:19:44.192753', 'step': 5545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:44.254972', 'step': 5545, 'epoch': 1} {'type': 'loss', 'content': 0.19425100088119507, 'timestamp': '2025-09-30 22:19:44.263451', 'step': 5546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:44.324271', 'step': 5546, 'epoch': 1} {'type': 'loss', 'content': 0.15080654621124268, 'timestamp': '2025-09-30 22:19:44.330009', 'step': 5547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:44.392278', 'step': 5547, 'epoch': 1} {'type': 'loss', 'content': 0.17512914538383484, 'timestamp': '2025-09-30 22:19:44.401648', 'step': 5548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:44.459588', 'step': 5548, 'epoch': 1} {'type': 'loss', 'content': 0.1657015085220337, 'timestamp': '2025-09-30 22:19:44.464458', 'step': 5549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:44.522931', 'step': 5549, 'epoch': 1} {'type': 'loss', 'content': 0.15181396901607513, 'timestamp': '2025-09-30 22:19:44.529098', 'step': 5550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:44.588890', 'step': 5550, 'epoch': 1} {'type': 'loss', 'content': 0.27602291107177734, 'timestamp': '2025-09-30 22:19:44.596536', 'step': 5551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:44.655466', 'step': 5551, 'epoch': 1} {'type': 'loss', 'content': 0.22321371734142303, 'timestamp': '2025-09-30 22:19:44.662807', 'step': 5552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:44.731297', 'step': 5552, 'epoch': 1} {'type': 'loss', 'content': 0.28620827198028564, 'timestamp': '2025-09-30 22:19:44.740541', 'step': 5553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:44.806402', 'step': 5553, 'epoch': 1} {'type': 'loss', 'content': 0.17918840050697327, 'timestamp': '2025-09-30 22:19:44.816643', 'step': 5554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:44.875053', 'step': 5554, 'epoch': 1} {'type': 'loss', 'content': 0.1348712146282196, 'timestamp': '2025-09-30 22:19:44.887951', 'step': 5555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:44.946728', 'step': 5555, 'epoch': 1} {'type': 'loss', 'content': 0.1473507285118103, 'timestamp': '2025-09-30 22:19:44.953587', 'step': 5556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:45.020736', 'step': 5556, 'epoch': 1} {'type': 'loss', 'content': 0.23950506746768951, 'timestamp': '2025-09-30 22:19:45.023974', 'step': 5557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:45.081990', 'step': 5557, 'epoch': 1} {'type': 'loss', 'content': 0.17786996066570282, 'timestamp': '2025-09-30 22:19:45.094288', 'step': 5558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:45.154470', 'step': 5558, 'epoch': 1} {'type': 'loss', 'content': 0.13646888732910156, 'timestamp': '2025-09-30 22:19:45.167200', 'step': 5559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:45.233918', 'step': 5559, 'epoch': 1} {'type': 'loss', 'content': 0.09155981987714767, 'timestamp': '2025-09-30 22:19:45.251206', 'step': 5560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:45.307022', 'step': 5560, 'epoch': 1} {'type': 'loss', 'content': 0.12978298962116241, 'timestamp': '2025-09-30 22:19:45.310200', 'step': 5561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:45.376249', 'step': 5561, 'epoch': 1} {'type': 'loss', 'content': 0.10922536253929138, 'timestamp': '2025-09-30 22:19:45.382716', 'step': 5562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:19:45.451732', 'step': 5562, 'epoch': 1} {'type': 'loss', 'content': 0.2377896010875702, 'timestamp': '2025-09-30 22:19:45.465171', 'step': 5563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:19:45.535038', 'step': 5563, 'epoch': 1} {'type': 'loss', 'content': 0.13403715193271637, 'timestamp': '2025-09-30 22:19:45.556430', 'step': 5564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:19:45.613359', 'step': 5564, 'epoch': 1} {'type': 'loss', 'content': 0.1441335678100586, 'timestamp': '2025-09-30 22:19:45.626365', 'step': 5565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:45.683188', 'step': 5565, 'epoch': 1} {'type': 'loss', 'content': 0.2524621784687042, 'timestamp': '2025-09-30 22:19:45.687952', 'step': 5566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:45.744737', 'step': 5566, 'epoch': 1} {'type': 'loss', 'content': 0.1641400158405304, 'timestamp': '2025-09-30 22:19:45.748744', 'step': 5567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:45.807166', 'step': 5567, 'epoch': 1} {'type': 'loss', 'content': 0.1834370642900467, 'timestamp': '2025-09-30 22:19:45.816149', 'step': 5568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:45.873913', 'step': 5568, 'epoch': 1} {'type': 'loss', 'content': 0.24937470257282257, 'timestamp': '2025-09-30 22:19:45.886460', 'step': 5569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:45.942936', 'step': 5569, 'epoch': 1} {'type': 'loss', 'content': 0.15420033037662506, 'timestamp': '2025-09-30 22:19:45.947962', 'step': 5570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:46.016052', 'step': 5570, 'epoch': 1} {'type': 'loss', 'content': 0.19858740270137787, 'timestamp': '2025-09-30 22:19:46.034365', 'step': 5571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:19:46.103844', 'step': 5571, 'epoch': 1} {'type': 'loss', 'content': 0.12073398381471634, 'timestamp': '2025-09-30 22:19:46.110544', 'step': 5572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:46.167609', 'step': 5572, 'epoch': 1} {'type': 'loss', 'content': 0.09882383048534393, 'timestamp': '2025-09-30 22:19:46.173761', 'step': 5573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:46.244795', 'step': 5573, 'epoch': 1} {'type': 'loss', 'content': 0.1927679479122162, 'timestamp': '2025-09-30 22:19:46.249980', 'step': 5574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:19:46.308195', 'step': 5574, 'epoch': 1} {'type': 'loss', 'content': 0.1147553026676178, 'timestamp': '2025-09-30 22:19:46.310986', 'step': 5575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:46.369919', 'step': 5575, 'epoch': 1} {'type': 'loss', 'content': 0.1402810961008072, 'timestamp': '2025-09-30 22:19:46.385439', 'step': 5576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:46.442114', 'step': 5576, 'epoch': 1} {'type': 'loss', 'content': 0.16116680204868317, 'timestamp': '2025-09-30 22:19:46.454978', 'step': 5577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:46.520013', 'step': 5577, 'epoch': 1} {'type': 'loss', 'content': 0.13975860178470612, 'timestamp': '2025-09-30 22:19:46.523761', 'step': 5578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:46.580107', 'step': 5578, 'epoch': 1} {'type': 'loss', 'content': 0.11702976375818253, 'timestamp': '2025-09-30 22:19:46.586746', 'step': 5579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:46.644238', 'step': 5579, 'epoch': 1} {'type': 'loss', 'content': 0.18559496104717255, 'timestamp': '2025-09-30 22:19:46.651248', 'step': 5580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:46.710563', 'step': 5580, 'epoch': 1} {'type': 'loss', 'content': 0.1188061386346817, 'timestamp': '2025-09-30 22:19:46.714391', 'step': 5581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:46.771927', 'step': 5581, 'epoch': 1} {'type': 'loss', 'content': 0.1130356714129448, 'timestamp': '2025-09-30 22:19:46.775004', 'step': 5582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:46.833863', 'step': 5582, 'epoch': 1} {'type': 'loss', 'content': 0.11461742222309113, 'timestamp': '2025-09-30 22:19:46.837408', 'step': 5583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:46.922339', 'step': 5583, 'epoch': 1} {'type': 'loss', 'content': 0.16482418775558472, 'timestamp': '2025-09-30 22:19:46.928633', 'step': 5584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:19:47.004244', 'step': 5584, 'epoch': 1} {'type': 'loss', 'content': 0.2601234018802643, 'timestamp': '2025-09-30 22:19:47.011653', 'step': 5585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:47.068858', 'step': 5585, 'epoch': 1} {'type': 'loss', 'content': 0.08505717664957047, 'timestamp': '2025-09-30 22:19:47.076008', 'step': 5586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:19:47.136760', 'step': 5586, 'epoch': 1} {'type': 'loss', 'content': 0.0938446968793869, 'timestamp': '2025-09-30 22:19:47.140792', 'step': 5587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:47.198828', 'step': 5587, 'epoch': 1} {'type': 'loss', 'content': 0.11651621013879776, 'timestamp': '2025-09-30 22:19:47.205881', 'step': 5588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:47.263010', 'step': 5588, 'epoch': 1} {'type': 'loss', 'content': 0.14383465051651, 'timestamp': '2025-09-30 22:19:47.265954', 'step': 5589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:19:47.325761', 'step': 5589, 'epoch': 1} {'type': 'loss', 'content': 0.09082543104887009, 'timestamp': '2025-09-30 22:19:47.332227', 'step': 5590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:19:47.389757', 'step': 5590, 'epoch': 1} {'type': 'loss', 'content': 0.20086219906806946, 'timestamp': '2025-09-30 22:19:47.394092', 'step': 5591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:19:47.452021', 'step': 5591, 'epoch': 1} {'type': 'loss', 'content': 0.09443747252225876, 'timestamp': '2025-09-30 22:19:47.459350', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:20:01.262912', 'step': 5592, 'epoch': 1} {'type': 'pplx', 'content': 11846.24102051335, 'timestamp': '2025-09-30 22:20:01.280295', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:01.338419', 'step': 5592, 'epoch': 1} {'type': 'loss', 'content': 0.14470161497592926, 'timestamp': '2025-09-30 22:20:01.342677', 'step': 5593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:01.408204', 'step': 5593, 'epoch': 1} {'type': 'loss', 'content': 0.1322217732667923, 'timestamp': '2025-09-30 22:20:01.418296', 'step': 5594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:01.477455', 'step': 5594, 'epoch': 1} {'type': 'loss', 'content': 0.17672087252140045, 'timestamp': '2025-09-30 22:20:01.488518', 'step': 5595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:01.551730', 'step': 5595, 'epoch': 1} {'type': 'loss', 'content': 0.18164795637130737, 'timestamp': '2025-09-30 22:20:01.559732', 'step': 5596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:01.625967', 'step': 5596, 'epoch': 1} {'type': 'loss', 'content': 0.2814655005931854, 'timestamp': '2025-09-30 22:20:01.635825', 'step': 5597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:01.701431', 'step': 5597, 'epoch': 1} {'type': 'loss', 'content': 0.11824293434619904, 'timestamp': '2025-09-30 22:20:01.712403', 'step': 5598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:01.778363', 'step': 5598, 'epoch': 1} {'type': 'loss', 'content': 0.15759055316448212, 'timestamp': '2025-09-30 22:20:01.781919', 'step': 5599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:01.847628', 'step': 5599, 'epoch': 1} {'type': 'loss', 'content': 0.18146519362926483, 'timestamp': '2025-09-30 22:20:01.860239', 'step': 5600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:01.927103', 'step': 5600, 'epoch': 1} {'type': 'loss', 'content': 0.1793324202299118, 'timestamp': '2025-09-30 22:20:01.937530', 'step': 5601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:02.010144', 'step': 5601, 'epoch': 1} {'type': 'loss', 'content': 0.1744956225156784, 'timestamp': '2025-09-30 22:20:02.019453', 'step': 5602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:02.078622', 'step': 5602, 'epoch': 1} {'type': 'loss', 'content': 0.22602635622024536, 'timestamp': '2025-09-30 22:20:02.085791', 'step': 5603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:02.150373', 'step': 5603, 'epoch': 1} {'type': 'loss', 'content': 0.1594018191099167, 'timestamp': '2025-09-30 22:20:02.156844', 'step': 5604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:02.214111', 'step': 5604, 'epoch': 1} {'type': 'loss', 'content': 0.16790740191936493, 'timestamp': '2025-09-30 22:20:02.217240', 'step': 5605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:02.275293', 'step': 5605, 'epoch': 1} {'type': 'loss', 'content': 0.07314053922891617, 'timestamp': '2025-09-30 22:20:02.284842', 'step': 5606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:02.342577', 'step': 5606, 'epoch': 1} {'type': 'loss', 'content': 0.1740506887435913, 'timestamp': '2025-09-30 22:20:02.346187', 'step': 5607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:02.402975', 'step': 5607, 'epoch': 1} {'type': 'loss', 'content': 0.2238846868276596, 'timestamp': '2025-09-30 22:20:02.409960', 'step': 5608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:02.466564', 'step': 5608, 'epoch': 1} {'type': 'loss', 'content': 0.13565388321876526, 'timestamp': '2025-09-30 22:20:02.476197', 'step': 5609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:02.549453', 'step': 5609, 'epoch': 1} {'type': 'loss', 'content': 0.10158194601535797, 'timestamp': '2025-09-30 22:20:02.553374', 'step': 5610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:02.610254', 'step': 5610, 'epoch': 1} {'type': 'loss', 'content': 0.0843503400683403, 'timestamp': '2025-09-30 22:20:02.614037', 'step': 5611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:02.671275', 'step': 5611, 'epoch': 1} {'type': 'loss', 'content': 0.14895562827587128, 'timestamp': '2025-09-30 22:20:02.682672', 'step': 5612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:02.743603', 'step': 5612, 'epoch': 1} {'type': 'loss', 'content': 0.13705909252166748, 'timestamp': '2025-09-30 22:20:02.748857', 'step': 5613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:02.808713', 'step': 5613, 'epoch': 1} {'type': 'loss', 'content': 0.1962699443101883, 'timestamp': '2025-09-30 22:20:02.811806', 'step': 5614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:02.877113', 'step': 5614, 'epoch': 1} {'type': 'loss', 'content': 0.1698208898305893, 'timestamp': '2025-09-30 22:20:02.879461', 'step': 5615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:02.936180', 'step': 5615, 'epoch': 1} {'type': 'loss', 'content': 0.1324155628681183, 'timestamp': '2025-09-30 22:20:02.943204', 'step': 5616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.001835', 'step': 5616, 'epoch': 1} {'type': 'loss', 'content': 0.14909255504608154, 'timestamp': '2025-09-30 22:20:03.004858', 'step': 5617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.069878', 'step': 5617, 'epoch': 1} {'type': 'loss', 'content': 0.21694006025791168, 'timestamp': '2025-09-30 22:20:03.072846', 'step': 5618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:03.132188', 'step': 5618, 'epoch': 1} {'type': 'loss', 'content': 0.13768623769283295, 'timestamp': '2025-09-30 22:20:03.141231', 'step': 5619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.206114', 'step': 5619, 'epoch': 1} {'type': 'loss', 'content': 0.19928723573684692, 'timestamp': '2025-09-30 22:20:03.215639', 'step': 5620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:03.271821', 'step': 5620, 'epoch': 1} {'type': 'loss', 'content': 0.2925710678100586, 'timestamp': '2025-09-30 22:20:03.274864', 'step': 5621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:03.337064', 'step': 5621, 'epoch': 1} {'type': 'loss', 'content': 0.19186067581176758, 'timestamp': '2025-09-30 22:20:03.346149', 'step': 5622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:03.409846', 'step': 5622, 'epoch': 1} {'type': 'loss', 'content': 0.14250488579273224, 'timestamp': '2025-09-30 22:20:03.413215', 'step': 5623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:03.472170', 'step': 5623, 'epoch': 1} {'type': 'loss', 'content': 0.17765282094478607, 'timestamp': '2025-09-30 22:20:03.483595', 'step': 5624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.559706', 'step': 5624, 'epoch': 1} {'type': 'loss', 'content': 0.18872594833374023, 'timestamp': '2025-09-30 22:20:03.566077', 'step': 5625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.628717', 'step': 5625, 'epoch': 1} {'type': 'loss', 'content': 0.24064581096172333, 'timestamp': '2025-09-30 22:20:03.631418', 'step': 5626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.688392', 'step': 5626, 'epoch': 1} {'type': 'loss', 'content': 0.3194248378276825, 'timestamp': '2025-09-30 22:20:03.694413', 'step': 5627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.750696', 'step': 5627, 'epoch': 1} {'type': 'loss', 'content': 0.15734536945819855, 'timestamp': '2025-09-30 22:20:03.761923', 'step': 5628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.826670', 'step': 5628, 'epoch': 1} {'type': 'loss', 'content': 0.18344658613204956, 'timestamp': '2025-09-30 22:20:03.842684', 'step': 5629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:03.911112', 'step': 5629, 'epoch': 1} {'type': 'loss', 'content': 0.2356909215450287, 'timestamp': '2025-09-30 22:20:03.917527', 'step': 5630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:03.980582', 'step': 5630, 'epoch': 1} {'type': 'loss', 'content': 0.1690947562456131, 'timestamp': '2025-09-30 22:20:03.989165', 'step': 5631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.050867', 'step': 5631, 'epoch': 1} {'type': 'loss', 'content': 0.16122792661190033, 'timestamp': '2025-09-30 22:20:04.061959', 'step': 5632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.124111', 'step': 5632, 'epoch': 1} {'type': 'loss', 'content': 0.17464816570281982, 'timestamp': '2025-09-30 22:20:04.128624', 'step': 5633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:04.188959', 'step': 5633, 'epoch': 1} {'type': 'loss', 'content': 0.16072379052639008, 'timestamp': '2025-09-30 22:20:04.198754', 'step': 5634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.259583', 'step': 5634, 'epoch': 1} {'type': 'loss', 'content': 0.23061503469944, 'timestamp': '2025-09-30 22:20:04.262293', 'step': 5635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:04.321248', 'step': 5635, 'epoch': 1} {'type': 'loss', 'content': 0.21136634051799774, 'timestamp': '2025-09-30 22:20:04.329822', 'step': 5636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:04.388175', 'step': 5636, 'epoch': 1} {'type': 'loss', 'content': 0.14494840800762177, 'timestamp': '2025-09-30 22:20:04.397709', 'step': 5637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:20:04.459917', 'step': 5637, 'epoch': 1} {'type': 'loss', 'content': 0.14066065847873688, 'timestamp': '2025-09-30 22:20:04.463164', 'step': 5638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.521319', 'step': 5638, 'epoch': 1} {'type': 'loss', 'content': 0.24227744340896606, 'timestamp': '2025-09-30 22:20:04.524429', 'step': 5639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:04.581705', 'step': 5639, 'epoch': 1} {'type': 'loss', 'content': 0.16682328283786774, 'timestamp': '2025-09-30 22:20:04.589479', 'step': 5640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.646252', 'step': 5640, 'epoch': 1} {'type': 'loss', 'content': 0.128479465842247, 'timestamp': '2025-09-30 22:20:04.650253', 'step': 5641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:04.706997', 'step': 5641, 'epoch': 1} {'type': 'loss', 'content': 0.29278960824012756, 'timestamp': '2025-09-30 22:20:04.710296', 'step': 5642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:04.766929', 'step': 5642, 'epoch': 1} {'type': 'loss', 'content': 0.22340211272239685, 'timestamp': '2025-09-30 22:20:04.771490', 'step': 5643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.828074', 'step': 5643, 'epoch': 1} {'type': 'loss', 'content': 0.17569483816623688, 'timestamp': '2025-09-30 22:20:04.834524', 'step': 5644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.902442', 'step': 5644, 'epoch': 1} {'type': 'loss', 'content': 0.1602887511253357, 'timestamp': '2025-09-30 22:20:04.905344', 'step': 5645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:04.967550', 'step': 5645, 'epoch': 1} {'type': 'loss', 'content': 0.19134671986103058, 'timestamp': '2025-09-30 22:20:04.970522', 'step': 5646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:05.028847', 'step': 5646, 'epoch': 1} {'type': 'loss', 'content': 0.09875286370515823, 'timestamp': '2025-09-30 22:20:05.032381', 'step': 5647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:20:05.093469', 'step': 5647, 'epoch': 1} {'type': 'loss', 'content': 0.24346372485160828, 'timestamp': '2025-09-30 22:20:05.100156', 'step': 5648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:05.161366', 'step': 5648, 'epoch': 1} {'type': 'loss', 'content': 0.16688252985477448, 'timestamp': '2025-09-30 22:20:05.164320', 'step': 5649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:05.221633', 'step': 5649, 'epoch': 1} {'type': 'loss', 'content': 0.14110656082630157, 'timestamp': '2025-09-30 22:20:05.224520', 'step': 5650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:05.281481', 'step': 5650, 'epoch': 1} {'type': 'loss', 'content': 0.1271946132183075, 'timestamp': '2025-09-30 22:20:05.284734', 'step': 5651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:05.342842', 'step': 5651, 'epoch': 1} {'type': 'loss', 'content': 0.05483338236808777, 'timestamp': '2025-09-30 22:20:05.349443', 'step': 5652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:05.405654', 'step': 5652, 'epoch': 1} {'type': 'loss', 'content': 0.11497368663549423, 'timestamp': '2025-09-30 22:20:05.409490', 'step': 5653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:05.467376', 'step': 5653, 'epoch': 1} {'type': 'loss', 'content': 0.20978491008281708, 'timestamp': '2025-09-30 22:20:05.473194', 'step': 5654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:05.530752', 'step': 5654, 'epoch': 1} {'type': 'loss', 'content': 0.17294418811798096, 'timestamp': '2025-09-30 22:20:05.543772', 'step': 5655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:05.607173', 'step': 5655, 'epoch': 1} {'type': 'loss', 'content': 0.18706202507019043, 'timestamp': '2025-09-30 22:20:05.613392', 'step': 5656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:05.669500', 'step': 5656, 'epoch': 1} {'type': 'loss', 'content': 0.15072524547576904, 'timestamp': '2025-09-30 22:20:05.672016', 'step': 5657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:05.730901', 'step': 5657, 'epoch': 1} {'type': 'loss', 'content': 0.25804224610328674, 'timestamp': '2025-09-30 22:20:05.733578', 'step': 5658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:05.793612', 'step': 5658, 'epoch': 1} {'type': 'loss', 'content': 0.16568386554718018, 'timestamp': '2025-09-30 22:20:05.797180', 'step': 5659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:05.853784', 'step': 5659, 'epoch': 1} {'type': 'loss', 'content': 0.10776659101247787, 'timestamp': '2025-09-30 22:20:05.860505', 'step': 5660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:05.917666', 'step': 5660, 'epoch': 1} {'type': 'loss', 'content': 0.1582677662372589, 'timestamp': '2025-09-30 22:20:05.925553', 'step': 5661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:05.982578', 'step': 5661, 'epoch': 1} {'type': 'loss', 'content': 0.1854148656129837, 'timestamp': '2025-09-30 22:20:05.987671', 'step': 5662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:06.049715', 'step': 5662, 'epoch': 1} {'type': 'loss', 'content': 0.10423924028873444, 'timestamp': '2025-09-30 22:20:06.052300', 'step': 5663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:06.110747', 'step': 5663, 'epoch': 1} {'type': 'loss', 'content': 0.14007017016410828, 'timestamp': '2025-09-30 22:20:06.117386', 'step': 5664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:06.176178', 'step': 5664, 'epoch': 1} {'type': 'loss', 'content': 0.14987638592720032, 'timestamp': '2025-09-30 22:20:06.179714', 'step': 5665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:06.239599', 'step': 5665, 'epoch': 1} {'type': 'loss', 'content': 0.08290747553110123, 'timestamp': '2025-09-30 22:20:06.243203', 'step': 5666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:06.299902', 'step': 5666, 'epoch': 1} {'type': 'loss', 'content': 0.15909895300865173, 'timestamp': '2025-09-30 22:20:06.307364', 'step': 5667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:06.365576', 'step': 5667, 'epoch': 1} {'type': 'loss', 'content': 0.1180478185415268, 'timestamp': '2025-09-30 22:20:06.373087', 'step': 5668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:20:06.435273', 'step': 5668, 'epoch': 1} {'type': 'loss', 'content': 0.19682376086711884, 'timestamp': '2025-09-30 22:20:06.439109', 'step': 5669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:06.496704', 'step': 5669, 'epoch': 1} {'type': 'loss', 'content': 0.09820058196783066, 'timestamp': '2025-09-30 22:20:06.499759', 'step': 5670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:06.557585', 'step': 5670, 'epoch': 1} {'type': 'loss', 'content': 0.13699759542942047, 'timestamp': '2025-09-30 22:20:06.560888', 'step': 5671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:06.617052', 'step': 5671, 'epoch': 1} {'type': 'loss', 'content': 0.14824321866035461, 'timestamp': '2025-09-30 22:20:06.623758', 'step': 5672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:06.680884', 'step': 5672, 'epoch': 1} {'type': 'loss', 'content': 0.22298918664455414, 'timestamp': '2025-09-30 22:20:06.683287', 'step': 5673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:06.747664', 'step': 5673, 'epoch': 1} {'type': 'loss', 'content': 0.12773726880550385, 'timestamp': '2025-09-30 22:20:06.763085', 'step': 5674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:06.845870', 'step': 5674, 'epoch': 1} {'type': 'loss', 'content': 0.15492315590381622, 'timestamp': '2025-09-30 22:20:06.874367', 'step': 5675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:06.954748', 'step': 5675, 'epoch': 1} {'type': 'loss', 'content': 0.17375734448432922, 'timestamp': '2025-09-30 22:20:06.986524', 'step': 5676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:07.073223', 'step': 5676, 'epoch': 1} {'type': 'loss', 'content': 0.12231036275625229, 'timestamp': '2025-09-30 22:20:07.102618', 'step': 5677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:07.179297', 'step': 5677, 'epoch': 1} {'type': 'loss', 'content': 0.0939478725194931, 'timestamp': '2025-09-30 22:20:07.204077', 'step': 5678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:07.287872', 'step': 5678, 'epoch': 1} {'type': 'loss', 'content': 0.10808144509792328, 'timestamp': '2025-09-30 22:20:07.312428', 'step': 5679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:07.391667', 'step': 5679, 'epoch': 1} {'type': 'loss', 'content': 0.1300405114889145, 'timestamp': '2025-09-30 22:20:07.405905', 'step': 5680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:07.470670', 'step': 5680, 'epoch': 1} {'type': 'loss', 'content': 0.1542547345161438, 'timestamp': '2025-09-30 22:20:07.488048', 'step': 5681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:07.560788', 'step': 5681, 'epoch': 1} {'type': 'loss', 'content': 0.14030498266220093, 'timestamp': '2025-09-30 22:20:07.567122', 'step': 5682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:07.631650', 'step': 5682, 'epoch': 1} {'type': 'loss', 'content': 0.16248585283756256, 'timestamp': '2025-09-30 22:20:07.652508', 'step': 5683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:07.742401', 'step': 5683, 'epoch': 1} {'type': 'loss', 'content': 0.25040340423583984, 'timestamp': '2025-09-30 22:20:07.760831', 'step': 5684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:07.831474', 'step': 5684, 'epoch': 1} {'type': 'loss', 'content': 0.15788552165031433, 'timestamp': '2025-09-30 22:20:07.847131', 'step': 5685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:07.916249', 'step': 5685, 'epoch': 1} {'type': 'loss', 'content': 0.18473710119724274, 'timestamp': '2025-09-30 22:20:07.928350', 'step': 5686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:07.991762', 'step': 5686, 'epoch': 1} {'type': 'loss', 'content': 0.14862622320652008, 'timestamp': '2025-09-30 22:20:08.006648', 'step': 5687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:08.075178', 'step': 5687, 'epoch': 1} {'type': 'loss', 'content': 0.21330247819423676, 'timestamp': '2025-09-30 22:20:08.092691', 'step': 5688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:08.161458', 'step': 5688, 'epoch': 1} {'type': 'loss', 'content': 0.14936891198158264, 'timestamp': '2025-09-30 22:20:08.195380', 'step': 5689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:08.259626', 'step': 5689, 'epoch': 1} {'type': 'loss', 'content': 0.19068197906017303, 'timestamp': '2025-09-30 22:20:08.280149', 'step': 5690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:08.364430', 'step': 5690, 'epoch': 1} {'type': 'loss', 'content': 0.21464872360229492, 'timestamp': '2025-09-30 22:20:08.389185', 'step': 5691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:08.458147', 'step': 5691, 'epoch': 1} {'type': 'loss', 'content': 0.19220300018787384, 'timestamp': '2025-09-30 22:20:08.469134', 'step': 5692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:08.530331', 'step': 5692, 'epoch': 1} {'type': 'loss', 'content': 0.1300922930240631, 'timestamp': '2025-09-30 22:20:08.547496', 'step': 5693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:08.626716', 'step': 5693, 'epoch': 1} {'type': 'loss', 'content': 0.3052196204662323, 'timestamp': '2025-09-30 22:20:08.646180', 'step': 5694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:08.708204', 'step': 5694, 'epoch': 1} {'type': 'loss', 'content': 0.16615550220012665, 'timestamp': '2025-09-30 22:20:08.714249', 'step': 5695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:08.782785', 'step': 5695, 'epoch': 1} {'type': 'loss', 'content': 0.2612348794937134, 'timestamp': '2025-09-30 22:20:08.789513', 'step': 5696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:08.857193', 'step': 5696, 'epoch': 1} {'type': 'loss', 'content': 0.11192705482244492, 'timestamp': '2025-09-30 22:20:08.866141', 'step': 5697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:08.926979', 'step': 5697, 'epoch': 1} {'type': 'loss', 'content': 0.19733163714408875, 'timestamp': '2025-09-30 22:20:08.930502', 'step': 5698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:08.997226', 'step': 5698, 'epoch': 1} {'type': 'loss', 'content': 0.11796356737613678, 'timestamp': '2025-09-30 22:20:09.000640', 'step': 5699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:09.059101', 'step': 5699, 'epoch': 1} {'type': 'loss', 'content': 0.10258235782384872, 'timestamp': '2025-09-30 22:20:09.065538', 'step': 5700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:09.124153', 'step': 5700, 'epoch': 1} {'type': 'loss', 'content': 0.1925199031829834, 'timestamp': '2025-09-30 22:20:09.126766', 'step': 5701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:09.185130', 'step': 5701, 'epoch': 1} {'type': 'loss', 'content': 0.12287517637014389, 'timestamp': '2025-09-30 22:20:09.188865', 'step': 5702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:09.249981', 'step': 5702, 'epoch': 1} {'type': 'loss', 'content': 0.1756330132484436, 'timestamp': '2025-09-30 22:20:09.254457', 'step': 5703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:09.312391', 'step': 5703, 'epoch': 1} {'type': 'loss', 'content': 0.1311371773481369, 'timestamp': '2025-09-30 22:20:09.318402', 'step': 5704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:09.375472', 'step': 5704, 'epoch': 1} {'type': 'loss', 'content': 0.18529246747493744, 'timestamp': '2025-09-30 22:20:09.377809', 'step': 5705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:09.434035', 'step': 5705, 'epoch': 1} {'type': 'loss', 'content': 0.13442201912403107, 'timestamp': '2025-09-30 22:20:09.436970', 'step': 5706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:09.496981', 'step': 5706, 'epoch': 1} {'type': 'loss', 'content': 0.1475699543952942, 'timestamp': '2025-09-30 22:20:09.499982', 'step': 5707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:09.558321', 'step': 5707, 'epoch': 1} {'type': 'loss', 'content': 0.14152748882770538, 'timestamp': '2025-09-30 22:20:09.564711', 'step': 5708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:09.623314', 'step': 5708, 'epoch': 1} {'type': 'loss', 'content': 0.2280946671962738, 'timestamp': '2025-09-30 22:20:09.626148', 'step': 5709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:09.687754', 'step': 5709, 'epoch': 1} {'type': 'loss', 'content': 0.2037132829427719, 'timestamp': '2025-09-30 22:20:09.689968', 'step': 5710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:09.747160', 'step': 5710, 'epoch': 1} {'type': 'loss', 'content': 0.15191537141799927, 'timestamp': '2025-09-30 22:20:09.749511', 'step': 5711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:09.806048', 'step': 5711, 'epoch': 1} {'type': 'loss', 'content': 0.0802689641714096, 'timestamp': '2025-09-30 22:20:09.811952', 'step': 5712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:09.873254', 'step': 5712, 'epoch': 1} {'type': 'loss', 'content': 0.13717429339885712, 'timestamp': '2025-09-30 22:20:09.876229', 'step': 5713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:09.932208', 'step': 5713, 'epoch': 1} {'type': 'loss', 'content': 0.17278648912906647, 'timestamp': '2025-09-30 22:20:09.935024', 'step': 5714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:09.997105', 'step': 5714, 'epoch': 1} {'type': 'loss', 'content': 0.169736847281456, 'timestamp': '2025-09-30 22:20:09.999810', 'step': 5715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:10.057544', 'step': 5715, 'epoch': 1} {'type': 'loss', 'content': 0.12075261026620865, 'timestamp': '2025-09-30 22:20:10.064252', 'step': 5716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:10.123703', 'step': 5716, 'epoch': 1} {'type': 'loss', 'content': 0.19824348390102386, 'timestamp': '2025-09-30 22:20:10.127121', 'step': 5717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:10.188316', 'step': 5717, 'epoch': 1} {'type': 'loss', 'content': 0.13747331500053406, 'timestamp': '2025-09-30 22:20:10.198202', 'step': 5718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:10.261249', 'step': 5718, 'epoch': 1} {'type': 'loss', 'content': 0.10796438157558441, 'timestamp': '2025-09-30 22:20:10.264480', 'step': 5719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:10.323693', 'step': 5719, 'epoch': 1} {'type': 'loss', 'content': 0.23324964940547943, 'timestamp': '2025-09-30 22:20:10.329970', 'step': 5720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:10.388022', 'step': 5720, 'epoch': 1} {'type': 'loss', 'content': 0.1538366824388504, 'timestamp': '2025-09-30 22:20:10.392906', 'step': 5721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:10.450594', 'step': 5721, 'epoch': 1} {'type': 'loss', 'content': 0.14892905950546265, 'timestamp': '2025-09-30 22:20:10.453226', 'step': 5722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:10.514784', 'step': 5722, 'epoch': 1} {'type': 'loss', 'content': 0.16564029455184937, 'timestamp': '2025-09-30 22:20:10.519483', 'step': 5723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:10.577181', 'step': 5723, 'epoch': 1} {'type': 'loss', 'content': 0.13676264882087708, 'timestamp': '2025-09-30 22:20:10.583864', 'step': 5724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:10.667496', 'step': 5724, 'epoch': 1} {'type': 'loss', 'content': 0.12340894341468811, 'timestamp': '2025-09-30 22:20:10.670683', 'step': 5725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:10.747588', 'step': 5725, 'epoch': 1} {'type': 'loss', 'content': 0.20661602914333344, 'timestamp': '2025-09-30 22:20:10.753599', 'step': 5726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:10.827207', 'step': 5726, 'epoch': 1} {'type': 'loss', 'content': 0.15709733963012695, 'timestamp': '2025-09-30 22:20:10.838289', 'step': 5727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:10.895783', 'step': 5727, 'epoch': 1} {'type': 'loss', 'content': 0.14259576797485352, 'timestamp': '2025-09-30 22:20:10.902437', 'step': 5728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:10.969556', 'step': 5728, 'epoch': 1} {'type': 'loss', 'content': 0.14476218819618225, 'timestamp': '2025-09-30 22:20:10.983992', 'step': 5729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:11.049274', 'step': 5729, 'epoch': 1} {'type': 'loss', 'content': 0.15347109735012054, 'timestamp': '2025-09-30 22:20:11.054897', 'step': 5730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:11.124729', 'step': 5730, 'epoch': 1} {'type': 'loss', 'content': 0.08123112469911575, 'timestamp': '2025-09-30 22:20:11.127155', 'step': 5731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:11.185667', 'step': 5731, 'epoch': 1} {'type': 'loss', 'content': 0.12922105193138123, 'timestamp': '2025-09-30 22:20:11.206770', 'step': 5732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:11.272910', 'step': 5732, 'epoch': 1} {'type': 'loss', 'content': 0.15844465792179108, 'timestamp': '2025-09-30 22:20:11.280087', 'step': 5733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:11.347529', 'step': 5733, 'epoch': 1} {'type': 'loss', 'content': 0.08564139902591705, 'timestamp': '2025-09-30 22:20:11.360383', 'step': 5734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:11.428788', 'step': 5734, 'epoch': 1} {'type': 'loss', 'content': 0.15573084354400635, 'timestamp': '2025-09-30 22:20:11.434441', 'step': 5735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:11.495960', 'step': 5735, 'epoch': 1} {'type': 'loss', 'content': 0.14479172229766846, 'timestamp': '2025-09-30 22:20:11.510138', 'step': 5736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:11.571793', 'step': 5736, 'epoch': 1} {'type': 'loss', 'content': 0.16183502972126007, 'timestamp': '2025-09-30 22:20:11.583166', 'step': 5737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:11.646726', 'step': 5737, 'epoch': 1} {'type': 'loss', 'content': 0.10324980318546295, 'timestamp': '2025-09-30 22:20:11.649853', 'step': 5738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:11.713288', 'step': 5738, 'epoch': 1} {'type': 'loss', 'content': 0.17214293777942657, 'timestamp': '2025-09-30 22:20:11.730464', 'step': 5739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:11.798950', 'step': 5739, 'epoch': 1} {'type': 'loss', 'content': 0.16794882714748383, 'timestamp': '2025-09-30 22:20:11.805309', 'step': 5740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:11.889470', 'step': 5740, 'epoch': 1} {'type': 'loss', 'content': 0.11849658191204071, 'timestamp': '2025-09-30 22:20:11.893380', 'step': 5741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:11.950475', 'step': 5741, 'epoch': 1} {'type': 'loss', 'content': 0.14988134801387787, 'timestamp': '2025-09-30 22:20:11.954918', 'step': 5742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.016060', 'step': 5742, 'epoch': 1} {'type': 'loss', 'content': 0.13015589118003845, 'timestamp': '2025-09-30 22:20:12.018585', 'step': 5743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:12.074281', 'step': 5743, 'epoch': 1} {'type': 'loss', 'content': 0.11193815618753433, 'timestamp': '2025-09-30 22:20:12.081881', 'step': 5744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.139895', 'step': 5744, 'epoch': 1} {'type': 'loss', 'content': 0.11157214641571045, 'timestamp': '2025-09-30 22:20:12.148592', 'step': 5745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.205007', 'step': 5745, 'epoch': 1} {'type': 'loss', 'content': 0.20664151012897491, 'timestamp': '2025-09-30 22:20:12.207926', 'step': 5746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.264551', 'step': 5746, 'epoch': 1} {'type': 'loss', 'content': 0.17684437334537506, 'timestamp': '2025-09-30 22:20:12.266934', 'step': 5747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:12.324042', 'step': 5747, 'epoch': 1} {'type': 'loss', 'content': 0.17131313681602478, 'timestamp': '2025-09-30 22:20:12.330326', 'step': 5748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.385406', 'step': 5748, 'epoch': 1} {'type': 'loss', 'content': 0.2173113375902176, 'timestamp': '2025-09-30 22:20:12.387789', 'step': 5749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.447090', 'step': 5749, 'epoch': 1} {'type': 'loss', 'content': 0.15093064308166504, 'timestamp': '2025-09-30 22:20:12.449879', 'step': 5750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.507071', 'step': 5750, 'epoch': 1} {'type': 'loss', 'content': 0.1601269692182541, 'timestamp': '2025-09-30 22:20:12.509273', 'step': 5751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.565248', 'step': 5751, 'epoch': 1} {'type': 'loss', 'content': 0.11229517310857773, 'timestamp': '2025-09-30 22:20:12.579256', 'step': 5752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:12.634775', 'step': 5752, 'epoch': 1} {'type': 'loss', 'content': 0.07558925449848175, 'timestamp': '2025-09-30 22:20:12.636976', 'step': 5753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:12.692772', 'step': 5753, 'epoch': 1} {'type': 'loss', 'content': 0.10503491014242172, 'timestamp': '2025-09-30 22:20:12.695360', 'step': 5754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:12.752875', 'step': 5754, 'epoch': 1} {'type': 'loss', 'content': 0.12885358929634094, 'timestamp': '2025-09-30 22:20:12.756653', 'step': 5755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:12.813129', 'step': 5755, 'epoch': 1} {'type': 'loss', 'content': 0.11891186237335205, 'timestamp': '2025-09-30 22:20:12.823065', 'step': 5756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:12.882046', 'step': 5756, 'epoch': 1} {'type': 'loss', 'content': 0.15949878096580505, 'timestamp': '2025-09-30 22:20:12.884344', 'step': 5757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:12.941535', 'step': 5757, 'epoch': 1} {'type': 'loss', 'content': 0.17899379134178162, 'timestamp': '2025-09-30 22:20:12.945924', 'step': 5758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:13.005583', 'step': 5758, 'epoch': 1} {'type': 'loss', 'content': 0.1241321787238121, 'timestamp': '2025-09-30 22:20:13.008413', 'step': 5759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:13.066310', 'step': 5759, 'epoch': 1} {'type': 'loss', 'content': 0.1158612072467804, 'timestamp': '2025-09-30 22:20:13.072466', 'step': 5760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:13.128869', 'step': 5760, 'epoch': 1} {'type': 'loss', 'content': 0.15963445603847504, 'timestamp': '2025-09-30 22:20:13.131241', 'step': 5761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:13.187271', 'step': 5761, 'epoch': 1} {'type': 'loss', 'content': 0.15793699026107788, 'timestamp': '2025-09-30 22:20:13.193004', 'step': 5762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:13.254341', 'step': 5762, 'epoch': 1} {'type': 'loss', 'content': 0.08270246535539627, 'timestamp': '2025-09-30 22:20:13.257110', 'step': 5763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:13.318039', 'step': 5763, 'epoch': 1} {'type': 'loss', 'content': 0.11750484257936478, 'timestamp': '2025-09-30 22:20:13.325164', 'step': 5764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:13.383521', 'step': 5764, 'epoch': 1} {'type': 'loss', 'content': 0.31792861223220825, 'timestamp': '2025-09-30 22:20:13.386748', 'step': 5765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:13.448859', 'step': 5765, 'epoch': 1} {'type': 'loss', 'content': 0.182082399725914, 'timestamp': '2025-09-30 22:20:13.456754', 'step': 5766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:13.523088', 'step': 5766, 'epoch': 1} {'type': 'loss', 'content': 0.1866508275270462, 'timestamp': '2025-09-30 22:20:13.526450', 'step': 5767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:13.585946', 'step': 5767, 'epoch': 1} {'type': 'loss', 'content': 0.162970170378685, 'timestamp': '2025-09-30 22:20:13.593126', 'step': 5768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:13.650036', 'step': 5768, 'epoch': 1} {'type': 'loss', 'content': 0.14345765113830566, 'timestamp': '2025-09-30 22:20:13.655108', 'step': 5769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:13.713682', 'step': 5769, 'epoch': 1} {'type': 'loss', 'content': 0.17369067668914795, 'timestamp': '2025-09-30 22:20:13.717044', 'step': 5770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:13.774250', 'step': 5770, 'epoch': 1} {'type': 'loss', 'content': 0.22239425778388977, 'timestamp': '2025-09-30 22:20:13.778375', 'step': 5771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:13.838362', 'step': 5771, 'epoch': 1} {'type': 'loss', 'content': 0.29722949862480164, 'timestamp': '2025-09-30 22:20:13.848536', 'step': 5772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:13.904701', 'step': 5772, 'epoch': 1} {'type': 'loss', 'content': 0.19396626949310303, 'timestamp': '2025-09-30 22:20:13.907399', 'step': 5773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:13.963467', 'step': 5773, 'epoch': 1} {'type': 'loss', 'content': 0.17834916710853577, 'timestamp': '2025-09-30 22:20:13.977077', 'step': 5774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:14.033126', 'step': 5774, 'epoch': 1} {'type': 'loss', 'content': 0.20113235712051392, 'timestamp': '2025-09-30 22:20:14.038378', 'step': 5775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:14.098163', 'step': 5775, 'epoch': 1} {'type': 'loss', 'content': 0.14812929928302765, 'timestamp': '2025-09-30 22:20:14.104604', 'step': 5776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:14.160481', 'step': 5776, 'epoch': 1} {'type': 'loss', 'content': 0.11114885658025742, 'timestamp': '2025-09-30 22:20:14.163537', 'step': 5777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:14.223085', 'step': 5777, 'epoch': 1} {'type': 'loss', 'content': 0.1456621140241623, 'timestamp': '2025-09-30 22:20:14.234909', 'step': 5778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:14.294693', 'step': 5778, 'epoch': 1} {'type': 'loss', 'content': 0.2003660500049591, 'timestamp': '2025-09-30 22:20:14.297534', 'step': 5779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:14.364721', 'step': 5779, 'epoch': 1} {'type': 'loss', 'content': 0.16483697295188904, 'timestamp': '2025-09-30 22:20:14.370623', 'step': 5780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:14.426875', 'step': 5780, 'epoch': 1} {'type': 'loss', 'content': 0.1642015129327774, 'timestamp': '2025-09-30 22:20:14.429518', 'step': 5781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:14.487383', 'step': 5781, 'epoch': 1} {'type': 'loss', 'content': 0.09444383531808853, 'timestamp': '2025-09-30 22:20:14.493323', 'step': 5782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:14.551058', 'step': 5782, 'epoch': 1} {'type': 'loss', 'content': 0.2109876424074173, 'timestamp': '2025-09-30 22:20:14.553592', 'step': 5783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:14.610218', 'step': 5783, 'epoch': 1} {'type': 'loss', 'content': 0.14808417856693268, 'timestamp': '2025-09-30 22:20:14.619752', 'step': 5784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:14.681647', 'step': 5784, 'epoch': 1} {'type': 'loss', 'content': 0.17476028203964233, 'timestamp': '2025-09-30 22:20:14.688139', 'step': 5785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:14.749507', 'step': 5785, 'epoch': 1} {'type': 'loss', 'content': 0.22067098319530487, 'timestamp': '2025-09-30 22:20:14.755845', 'step': 5786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:14.813326', 'step': 5786, 'epoch': 1} {'type': 'loss', 'content': 0.2332415133714676, 'timestamp': '2025-09-30 22:20:14.817883', 'step': 5787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:14.889340', 'step': 5787, 'epoch': 1} {'type': 'loss', 'content': 0.10888087004423141, 'timestamp': '2025-09-30 22:20:14.895404', 'step': 5788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:14.951419', 'step': 5788, 'epoch': 1} {'type': 'loss', 'content': 0.17390309274196625, 'timestamp': '2025-09-30 22:20:14.954219', 'step': 5789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:15.011162', 'step': 5789, 'epoch': 1} {'type': 'loss', 'content': 0.21440544724464417, 'timestamp': '2025-09-30 22:20:15.015140', 'step': 5790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:15.071958', 'step': 5790, 'epoch': 1} {'type': 'loss', 'content': 0.1662459820508957, 'timestamp': '2025-09-30 22:20:15.074489', 'step': 5791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:15.130622', 'step': 5791, 'epoch': 1} {'type': 'loss', 'content': 0.12951859831809998, 'timestamp': '2025-09-30 22:20:15.138864', 'step': 5792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:15.200635', 'step': 5792, 'epoch': 1} {'type': 'loss', 'content': 0.1789240539073944, 'timestamp': '2025-09-30 22:20:15.202965', 'step': 5793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:15.258681', 'step': 5793, 'epoch': 1} {'type': 'loss', 'content': 0.19076430797576904, 'timestamp': '2025-09-30 22:20:15.260911', 'step': 5794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:15.326717', 'step': 5794, 'epoch': 1} {'type': 'loss', 'content': 0.17878083884716034, 'timestamp': '2025-09-30 22:20:15.329440', 'step': 5795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:15.394925', 'step': 5795, 'epoch': 1} {'type': 'loss', 'content': 0.2205568552017212, 'timestamp': '2025-09-30 22:20:15.404396', 'step': 5796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:15.460844', 'step': 5796, 'epoch': 1} {'type': 'loss', 'content': 0.17157289385795593, 'timestamp': '2025-09-30 22:20:15.463431', 'step': 5797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:15.521717', 'step': 5797, 'epoch': 1} {'type': 'loss', 'content': 0.0917077362537384, 'timestamp': '2025-09-30 22:20:15.523944', 'step': 5798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:15.580527', 'step': 5798, 'epoch': 1} {'type': 'loss', 'content': 0.11305447667837143, 'timestamp': '2025-09-30 22:20:15.583278', 'step': 5799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:15.639891', 'step': 5799, 'epoch': 1} {'type': 'loss', 'content': 0.12153282761573792, 'timestamp': '2025-09-30 22:20:15.646303', 'step': 5800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:15.711333', 'step': 5800, 'epoch': 1} {'type': 'loss', 'content': 0.26329150795936584, 'timestamp': '2025-09-30 22:20:15.715499', 'step': 5801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:15.773868', 'step': 5801, 'epoch': 1} {'type': 'loss', 'content': 0.22276924550533295, 'timestamp': '2025-09-30 22:20:15.776777', 'step': 5802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:15.837203', 'step': 5802, 'epoch': 1} {'type': 'loss', 'content': 0.06445278972387314, 'timestamp': '2025-09-30 22:20:15.840477', 'step': 5803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:15.906063', 'step': 5803, 'epoch': 1} {'type': 'loss', 'content': 0.14019440114498138, 'timestamp': '2025-09-30 22:20:15.911841', 'step': 5804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:15.968242', 'step': 5804, 'epoch': 1} {'type': 'loss', 'content': 0.10604995489120483, 'timestamp': '2025-09-30 22:20:15.974343', 'step': 5805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:16.032351', 'step': 5805, 'epoch': 1} {'type': 'loss', 'content': 0.20741595327854156, 'timestamp': '2025-09-30 22:20:16.034467', 'step': 5806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:16.090767', 'step': 5806, 'epoch': 1} {'type': 'loss', 'content': 0.13836288452148438, 'timestamp': '2025-09-30 22:20:16.093353', 'step': 5807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:16.152012', 'step': 5807, 'epoch': 1} {'type': 'loss', 'content': 0.19970643520355225, 'timestamp': '2025-09-30 22:20:16.163552', 'step': 5808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:16.219862', 'step': 5808, 'epoch': 1} {'type': 'loss', 'content': 0.13934111595153809, 'timestamp': '2025-09-30 22:20:16.222079', 'step': 5809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:16.279210', 'step': 5809, 'epoch': 1} {'type': 'loss', 'content': 0.14327096939086914, 'timestamp': '2025-09-30 22:20:16.282633', 'step': 5810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:16.342794', 'step': 5810, 'epoch': 1} {'type': 'loss', 'content': 0.08430419117212296, 'timestamp': '2025-09-30 22:20:16.349207', 'step': 5811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:16.410356', 'step': 5811, 'epoch': 1} {'type': 'loss', 'content': 0.10653016716241837, 'timestamp': '2025-09-30 22:20:16.416655', 'step': 5812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:16.472543', 'step': 5812, 'epoch': 1} {'type': 'loss', 'content': 0.1855834275484085, 'timestamp': '2025-09-30 22:20:16.475037', 'step': 5813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:16.532297', 'step': 5813, 'epoch': 1} {'type': 'loss', 'content': 0.13909168541431427, 'timestamp': '2025-09-30 22:20:16.536614', 'step': 5814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:16.605770', 'step': 5814, 'epoch': 1} {'type': 'loss', 'content': 0.15216991305351257, 'timestamp': '2025-09-30 22:20:16.608738', 'step': 5815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:16.669549', 'step': 5815, 'epoch': 1} {'type': 'loss', 'content': 0.16510319709777832, 'timestamp': '2025-09-30 22:20:16.680191', 'step': 5816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:16.737380', 'step': 5816, 'epoch': 1} {'type': 'loss', 'content': 0.1395951211452484, 'timestamp': '2025-09-30 22:20:16.744142', 'step': 5817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:16.804908', 'step': 5817, 'epoch': 1} {'type': 'loss', 'content': 0.16286982595920563, 'timestamp': '2025-09-30 22:20:16.809602', 'step': 5818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:16.867057', 'step': 5818, 'epoch': 1} {'type': 'loss', 'content': 0.07406799495220184, 'timestamp': '2025-09-30 22:20:16.870134', 'step': 5819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:16.930695', 'step': 5819, 'epoch': 1} {'type': 'loss', 'content': 0.1814151406288147, 'timestamp': '2025-09-30 22:20:16.941240', 'step': 5820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:16.998276', 'step': 5820, 'epoch': 1} {'type': 'loss', 'content': 0.13761194050312042, 'timestamp': '2025-09-30 22:20:17.003214', 'step': 5821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:17.061265', 'step': 5821, 'epoch': 1} {'type': 'loss', 'content': 0.21244053542613983, 'timestamp': '2025-09-30 22:20:17.065267', 'step': 5822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:17.127801', 'step': 5822, 'epoch': 1} {'type': 'loss', 'content': 0.1360279619693756, 'timestamp': '2025-09-30 22:20:17.130634', 'step': 5823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:17.187267', 'step': 5823, 'epoch': 1} {'type': 'loss', 'content': 0.14904992282390594, 'timestamp': '2025-09-30 22:20:17.194117', 'step': 5824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:17.253598', 'step': 5824, 'epoch': 1} {'type': 'loss', 'content': 0.12044180184602737, 'timestamp': '2025-09-30 22:20:17.255858', 'step': 5825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:17.313800', 'step': 5825, 'epoch': 1} {'type': 'loss', 'content': 0.19879458844661713, 'timestamp': '2025-09-30 22:20:17.323784', 'step': 5826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:17.381993', 'step': 5826, 'epoch': 1} {'type': 'loss', 'content': 0.13882704079151154, 'timestamp': '2025-09-30 22:20:17.384422', 'step': 5827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:17.443576', 'step': 5827, 'epoch': 1} {'type': 'loss', 'content': 0.11251220852136612, 'timestamp': '2025-09-30 22:20:17.451818', 'step': 5828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:17.509685', 'step': 5828, 'epoch': 1} {'type': 'loss', 'content': 0.16298457980155945, 'timestamp': '2025-09-30 22:20:17.514083', 'step': 5829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:17.570908', 'step': 5829, 'epoch': 1} {'type': 'loss', 'content': 0.13100385665893555, 'timestamp': '2025-09-30 22:20:17.573872', 'step': 5830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:17.630925', 'step': 5830, 'epoch': 1} {'type': 'loss', 'content': 0.2068699151277542, 'timestamp': '2025-09-30 22:20:17.634844', 'step': 5831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:17.691573', 'step': 5831, 'epoch': 1} {'type': 'loss', 'content': 0.12513038516044617, 'timestamp': '2025-09-30 22:20:17.697960', 'step': 5832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:20:17.760478', 'step': 5832, 'epoch': 1} {'type': 'loss', 'content': 0.194819375872612, 'timestamp': '2025-09-30 22:20:17.763632', 'step': 5833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:17.821605', 'step': 5833, 'epoch': 1} {'type': 'loss', 'content': 0.14654500782489777, 'timestamp': '2025-09-30 22:20:17.825120', 'step': 5834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:17.882228', 'step': 5834, 'epoch': 1} {'type': 'loss', 'content': 0.06977201998233795, 'timestamp': '2025-09-30 22:20:17.885569', 'step': 5835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:17.942750', 'step': 5835, 'epoch': 1} {'type': 'loss', 'content': 0.1865260899066925, 'timestamp': '2025-09-30 22:20:17.949496', 'step': 5836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:18.006065', 'step': 5836, 'epoch': 1} {'type': 'loss', 'content': 0.11572235077619553, 'timestamp': '2025-09-30 22:20:18.008665', 'step': 5837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:18.065160', 'step': 5837, 'epoch': 1} {'type': 'loss', 'content': 0.15404249727725983, 'timestamp': '2025-09-30 22:20:18.068472', 'step': 5838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:18.127203', 'step': 5838, 'epoch': 1} {'type': 'loss', 'content': 0.15182006359100342, 'timestamp': '2025-09-30 22:20:18.130400', 'step': 5839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:18.188043', 'step': 5839, 'epoch': 1} {'type': 'loss', 'content': 0.14533711969852448, 'timestamp': '2025-09-30 22:20:18.195267', 'step': 5840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:18.253522', 'step': 5840, 'epoch': 1} {'type': 'loss', 'content': 0.11418098211288452, 'timestamp': '2025-09-30 22:20:18.256995', 'step': 5841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:18.318363', 'step': 5841, 'epoch': 1} {'type': 'loss', 'content': 0.162485733628273, 'timestamp': '2025-09-30 22:20:18.321890', 'step': 5842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:18.381714', 'step': 5842, 'epoch': 1} {'type': 'loss', 'content': 0.10184821486473083, 'timestamp': '2025-09-30 22:20:18.385220', 'step': 5843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:18.442369', 'step': 5843, 'epoch': 1} {'type': 'loss', 'content': 0.132611945271492, 'timestamp': '2025-09-30 22:20:18.452562', 'step': 5844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:18.509075', 'step': 5844, 'epoch': 1} {'type': 'loss', 'content': 0.13541974127292633, 'timestamp': '2025-09-30 22:20:18.512636', 'step': 5845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:18.569095', 'step': 5845, 'epoch': 1} {'type': 'loss', 'content': 0.1831589639186859, 'timestamp': '2025-09-30 22:20:18.571777', 'step': 5846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:18.628884', 'step': 5846, 'epoch': 1} {'type': 'loss', 'content': 0.2596725821495056, 'timestamp': '2025-09-30 22:20:18.631891', 'step': 5847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:18.691438', 'step': 5847, 'epoch': 1} {'type': 'loss', 'content': 0.18114010989665985, 'timestamp': '2025-09-30 22:20:18.697860', 'step': 5848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:18.754430', 'step': 5848, 'epoch': 1} {'type': 'loss', 'content': 0.16686305403709412, 'timestamp': '2025-09-30 22:20:18.758469', 'step': 5849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:18.815877', 'step': 5849, 'epoch': 1} {'type': 'loss', 'content': 0.23441432416439056, 'timestamp': '2025-09-30 22:20:18.819011', 'step': 5850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:18.880680', 'step': 5850, 'epoch': 1} {'type': 'loss', 'content': 0.20884376764297485, 'timestamp': '2025-09-30 22:20:18.883815', 'step': 5851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:18.940395', 'step': 5851, 'epoch': 1} {'type': 'loss', 'content': 0.13457249104976654, 'timestamp': '2025-09-30 22:20:18.954852', 'step': 5852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:19.011048', 'step': 5852, 'epoch': 1} {'type': 'loss', 'content': 0.18066686391830444, 'timestamp': '2025-09-30 22:20:19.015183', 'step': 5853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:19.074403', 'step': 5853, 'epoch': 1} {'type': 'loss', 'content': 0.12774640321731567, 'timestamp': '2025-09-30 22:20:19.077478', 'step': 5854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:19.135378', 'step': 5854, 'epoch': 1} {'type': 'loss', 'content': 0.20809869468212128, 'timestamp': '2025-09-30 22:20:19.145324', 'step': 5855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:19.203245', 'step': 5855, 'epoch': 1} {'type': 'loss', 'content': 0.16992591321468353, 'timestamp': '2025-09-30 22:20:19.211142', 'step': 5856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:19.267847', 'step': 5856, 'epoch': 1} {'type': 'loss', 'content': 0.24834172427654266, 'timestamp': '2025-09-30 22:20:19.272716', 'step': 5857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:19.333901', 'step': 5857, 'epoch': 1} {'type': 'loss', 'content': 0.16170427203178406, 'timestamp': '2025-09-30 22:20:19.336673', 'step': 5858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:19.393151', 'step': 5858, 'epoch': 1} {'type': 'loss', 'content': 0.17944318056106567, 'timestamp': '2025-09-30 22:20:19.396071', 'step': 5859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:19.460136', 'step': 5859, 'epoch': 1} {'type': 'loss', 'content': 0.24293537437915802, 'timestamp': '2025-09-30 22:20:19.466587', 'step': 5860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:19.523569', 'step': 5860, 'epoch': 1} {'type': 'loss', 'content': 0.15017467737197876, 'timestamp': '2025-09-30 22:20:19.526381', 'step': 5861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:19.583343', 'step': 5861, 'epoch': 1} {'type': 'loss', 'content': 0.18588893115520477, 'timestamp': '2025-09-30 22:20:19.585597', 'step': 5862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:19.645618', 'step': 5862, 'epoch': 1} {'type': 'loss', 'content': 0.15609170496463776, 'timestamp': '2025-09-30 22:20:19.651872', 'step': 5863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:19.708098', 'step': 5863, 'epoch': 1} {'type': 'loss', 'content': 0.11012860387563705, 'timestamp': '2025-09-30 22:20:19.714294', 'step': 5864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:19.770311', 'step': 5864, 'epoch': 1} {'type': 'loss', 'content': 0.1430259644985199, 'timestamp': '2025-09-30 22:20:19.774023', 'step': 5865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:19.830878', 'step': 5865, 'epoch': 1} {'type': 'loss', 'content': 0.17903773486614227, 'timestamp': '2025-09-30 22:20:19.833141', 'step': 5866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:19.890666', 'step': 5866, 'epoch': 1} {'type': 'loss', 'content': 0.18074874579906464, 'timestamp': '2025-09-30 22:20:19.893036', 'step': 5867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:19.949395', 'step': 5867, 'epoch': 1} {'type': 'loss', 'content': 0.15880802273750305, 'timestamp': '2025-09-30 22:20:19.955645', 'step': 5868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:20.013958', 'step': 5868, 'epoch': 1} {'type': 'loss', 'content': 0.1503794938325882, 'timestamp': '2025-09-30 22:20:20.016129', 'step': 5869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:20.075383', 'step': 5869, 'epoch': 1} {'type': 'loss', 'content': 0.1797330379486084, 'timestamp': '2025-09-30 22:20:20.079139', 'step': 5870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:20.139336', 'step': 5870, 'epoch': 1} {'type': 'loss', 'content': 0.1292976438999176, 'timestamp': '2025-09-30 22:20:20.143120', 'step': 5871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:20.202057', 'step': 5871, 'epoch': 1} {'type': 'loss', 'content': 0.08495302498340607, 'timestamp': '2025-09-30 22:20:20.208887', 'step': 5872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:20.266847', 'step': 5872, 'epoch': 1} {'type': 'loss', 'content': 0.15865136682987213, 'timestamp': '2025-09-30 22:20:20.270298', 'step': 5873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:20.329652', 'step': 5873, 'epoch': 1} {'type': 'loss', 'content': 0.09902019053697586, 'timestamp': '2025-09-30 22:20:20.332513', 'step': 5874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:20.390640', 'step': 5874, 'epoch': 1} {'type': 'loss', 'content': 0.173916295170784, 'timestamp': '2025-09-30 22:20:20.397312', 'step': 5875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:20.458796', 'step': 5875, 'epoch': 1} {'type': 'loss', 'content': 0.17912648618221283, 'timestamp': '2025-09-30 22:20:20.465767', 'step': 5876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:20.524559', 'step': 5876, 'epoch': 1} {'type': 'loss', 'content': 0.10301005840301514, 'timestamp': '2025-09-30 22:20:20.536206', 'step': 5877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:20.594661', 'step': 5877, 'epoch': 1} {'type': 'loss', 'content': 0.08789179474115372, 'timestamp': '2025-09-30 22:20:20.597231', 'step': 5878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:20.655748', 'step': 5878, 'epoch': 1} {'type': 'loss', 'content': 0.16509447991847992, 'timestamp': '2025-09-30 22:20:20.658616', 'step': 5879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:20.720070', 'step': 5879, 'epoch': 1} {'type': 'loss', 'content': 0.1748245358467102, 'timestamp': '2025-09-30 22:20:20.727004', 'step': 5880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:20.787654', 'step': 5880, 'epoch': 1} {'type': 'loss', 'content': 0.22776716947555542, 'timestamp': '2025-09-30 22:20:20.791981', 'step': 5881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:20.849776', 'step': 5881, 'epoch': 1} {'type': 'loss', 'content': 0.1748114824295044, 'timestamp': '2025-09-30 22:20:20.853272', 'step': 5882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:20.911864', 'step': 5882, 'epoch': 1} {'type': 'loss', 'content': 0.12996512651443481, 'timestamp': '2025-09-30 22:20:20.915086', 'step': 5883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:20.973463', 'step': 5883, 'epoch': 1} {'type': 'loss', 'content': 0.21390938758850098, 'timestamp': '2025-09-30 22:20:20.979607', 'step': 5884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:21.037307', 'step': 5884, 'epoch': 1} {'type': 'loss', 'content': 0.04987679049372673, 'timestamp': '2025-09-30 22:20:21.041261', 'step': 5885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:21.100799', 'step': 5885, 'epoch': 1} {'type': 'loss', 'content': 0.16154745221138, 'timestamp': '2025-09-30 22:20:21.110959', 'step': 5886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:21.171082', 'step': 5886, 'epoch': 1} {'type': 'loss', 'content': 0.11570581048727036, 'timestamp': '2025-09-30 22:20:21.174001', 'step': 5887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:21.232949', 'step': 5887, 'epoch': 1} {'type': 'loss', 'content': 0.1586771309375763, 'timestamp': '2025-09-30 22:20:21.239253', 'step': 5888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:21.296471', 'step': 5888, 'epoch': 1} {'type': 'loss', 'content': 0.28920644521713257, 'timestamp': '2025-09-30 22:20:21.302034', 'step': 5889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:21.359267', 'step': 5889, 'epoch': 1} {'type': 'loss', 'content': 0.13537444174289703, 'timestamp': '2025-09-30 22:20:21.362290', 'step': 5890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:21.425362', 'step': 5890, 'epoch': 1} {'type': 'loss', 'content': 0.18219012022018433, 'timestamp': '2025-09-30 22:20:21.431180', 'step': 5891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:21.489381', 'step': 5891, 'epoch': 1} {'type': 'loss', 'content': 0.11583757400512695, 'timestamp': '2025-09-30 22:20:21.495973', 'step': 5892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:21.556961', 'step': 5892, 'epoch': 1} {'type': 'loss', 'content': 0.09612991660833359, 'timestamp': '2025-09-30 22:20:21.560194', 'step': 5893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:21.617725', 'step': 5893, 'epoch': 1} {'type': 'loss', 'content': 0.15544696152210236, 'timestamp': '2025-09-30 22:20:21.622563', 'step': 5894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:21.681662', 'step': 5894, 'epoch': 1} {'type': 'loss', 'content': 0.2276136428117752, 'timestamp': '2025-09-30 22:20:21.684519', 'step': 5895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:21.742665', 'step': 5895, 'epoch': 1} {'type': 'loss', 'content': 0.1743338257074356, 'timestamp': '2025-09-30 22:20:21.749883', 'step': 5896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:21.811524', 'step': 5896, 'epoch': 1} {'type': 'loss', 'content': 0.1986486166715622, 'timestamp': '2025-09-30 22:20:21.814008', 'step': 5897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:21.880078', 'step': 5897, 'epoch': 1} {'type': 'loss', 'content': 0.1427392214536667, 'timestamp': '2025-09-30 22:20:21.883113', 'step': 5898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:21.940730', 'step': 5898, 'epoch': 1} {'type': 'loss', 'content': 0.10802675038576126, 'timestamp': '2025-09-30 22:20:21.944632', 'step': 5899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:22.004470', 'step': 5899, 'epoch': 1} {'type': 'loss', 'content': 0.21954180300235748, 'timestamp': '2025-09-30 22:20:22.017791', 'step': 5900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:22.075127', 'step': 5900, 'epoch': 1} {'type': 'loss', 'content': 0.14283767342567444, 'timestamp': '2025-09-30 22:20:22.081548', 'step': 5901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:22.139028', 'step': 5901, 'epoch': 1} {'type': 'loss', 'content': 0.14436186850070953, 'timestamp': '2025-09-30 22:20:22.142758', 'step': 5902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:22.200465', 'step': 5902, 'epoch': 1} {'type': 'loss', 'content': 0.10310132801532745, 'timestamp': '2025-09-30 22:20:22.203025', 'step': 5903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:22.259828', 'step': 5903, 'epoch': 1} {'type': 'loss', 'content': 0.30676549673080444, 'timestamp': '2025-09-30 22:20:22.266441', 'step': 5904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:22.330692', 'step': 5904, 'epoch': 1} {'type': 'loss', 'content': 0.1659603863954544, 'timestamp': '2025-09-30 22:20:22.337957', 'step': 5905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:22.394718', 'step': 5905, 'epoch': 1} {'type': 'loss', 'content': 0.2191101461648941, 'timestamp': '2025-09-30 22:20:22.400981', 'step': 5906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:22.457632', 'step': 5906, 'epoch': 1} {'type': 'loss', 'content': 0.18708392977714539, 'timestamp': '2025-09-30 22:20:22.464094', 'step': 5907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:22.525675', 'step': 5907, 'epoch': 1} {'type': 'loss', 'content': 0.1522529423236847, 'timestamp': '2025-09-30 22:20:22.532236', 'step': 5908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:22.589197', 'step': 5908, 'epoch': 1} {'type': 'loss', 'content': 0.1356067955493927, 'timestamp': '2025-09-30 22:20:22.603244', 'step': 5909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:22.661088', 'step': 5909, 'epoch': 1} {'type': 'loss', 'content': 0.1885824203491211, 'timestamp': '2025-09-30 22:20:22.663840', 'step': 5910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:22.721358', 'step': 5910, 'epoch': 1} {'type': 'loss', 'content': 0.14581920206546783, 'timestamp': '2025-09-30 22:20:22.734132', 'step': 5911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:22.790953', 'step': 5911, 'epoch': 1} {'type': 'loss', 'content': 0.14177528023719788, 'timestamp': '2025-09-30 22:20:22.797182', 'step': 5912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:22.854204', 'step': 5912, 'epoch': 1} {'type': 'loss', 'content': 0.1255146563053131, 'timestamp': '2025-09-30 22:20:22.856766', 'step': 5913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:22.913677', 'step': 5913, 'epoch': 1} {'type': 'loss', 'content': 0.29012978076934814, 'timestamp': '2025-09-30 22:20:22.916100', 'step': 5914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:22.973956', 'step': 5914, 'epoch': 1} {'type': 'loss', 'content': 0.23531991243362427, 'timestamp': '2025-09-30 22:20:22.976840', 'step': 5915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:23.040207', 'step': 5915, 'epoch': 1} {'type': 'loss', 'content': 0.13652804493904114, 'timestamp': '2025-09-30 22:20:23.047383', 'step': 5916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:23.105568', 'step': 5916, 'epoch': 1} {'type': 'loss', 'content': 0.1910969316959381, 'timestamp': '2025-09-30 22:20:23.108205', 'step': 5917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:23.164686', 'step': 5917, 'epoch': 1} {'type': 'loss', 'content': 0.12388100475072861, 'timestamp': '2025-09-30 22:20:23.166926', 'step': 5918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.223679', 'step': 5918, 'epoch': 1} {'type': 'loss', 'content': 0.19406364858150482, 'timestamp': '2025-09-30 22:20:23.228351', 'step': 5919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.289164', 'step': 5919, 'epoch': 1} {'type': 'loss', 'content': 0.18973615765571594, 'timestamp': '2025-09-30 22:20:23.295215', 'step': 5920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.352587', 'step': 5920, 'epoch': 1} {'type': 'loss', 'content': 0.14292669296264648, 'timestamp': '2025-09-30 22:20:23.357856', 'step': 5921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:23.414800', 'step': 5921, 'epoch': 1} {'type': 'loss', 'content': 0.2196139246225357, 'timestamp': '2025-09-30 22:20:23.417334', 'step': 5922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.476446', 'step': 5922, 'epoch': 1} {'type': 'loss', 'content': 0.22334469854831696, 'timestamp': '2025-09-30 22:20:23.480091', 'step': 5923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:23.538152', 'step': 5923, 'epoch': 1} {'type': 'loss', 'content': 0.15786020457744598, 'timestamp': '2025-09-30 22:20:23.543964', 'step': 5924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:23.600320', 'step': 5924, 'epoch': 1} {'type': 'loss', 'content': 0.08330632001161575, 'timestamp': '2025-09-30 22:20:23.602981', 'step': 5925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:23.659758', 'step': 5925, 'epoch': 1} {'type': 'loss', 'content': 0.13952168822288513, 'timestamp': '2025-09-30 22:20:23.663024', 'step': 5926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.720565', 'step': 5926, 'epoch': 1} {'type': 'loss', 'content': 0.09940207749605179, 'timestamp': '2025-09-30 22:20:23.723752', 'step': 5927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.784349', 'step': 5927, 'epoch': 1} {'type': 'loss', 'content': 0.07786095142364502, 'timestamp': '2025-09-30 22:20:23.791939', 'step': 5928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.849124', 'step': 5928, 'epoch': 1} {'type': 'loss', 'content': 0.09725701063871384, 'timestamp': '2025-09-30 22:20:23.854681', 'step': 5929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.912568', 'step': 5929, 'epoch': 1} {'type': 'loss', 'content': 0.07553548365831375, 'timestamp': '2025-09-30 22:20:23.919187', 'step': 5930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:23.977374', 'step': 5930, 'epoch': 1} {'type': 'loss', 'content': 0.18782585859298706, 'timestamp': '2025-09-30 22:20:23.982085', 'step': 5931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:24.044571', 'step': 5931, 'epoch': 1} {'type': 'loss', 'content': 0.1736723780632019, 'timestamp': '2025-09-30 22:20:24.052804', 'step': 5932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:24.111752', 'step': 5932, 'epoch': 1} {'type': 'loss', 'content': 0.22496666014194489, 'timestamp': '2025-09-30 22:20:24.118215', 'step': 5933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:24.178459', 'step': 5933, 'epoch': 1} {'type': 'loss', 'content': 0.12228460609912872, 'timestamp': '2025-09-30 22:20:24.180999', 'step': 5934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:24.246004', 'step': 5934, 'epoch': 1} {'type': 'loss', 'content': 0.18875816464424133, 'timestamp': '2025-09-30 22:20:24.248907', 'step': 5935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:24.305968', 'step': 5935, 'epoch': 1} {'type': 'loss', 'content': 0.11685193330049515, 'timestamp': '2025-09-30 22:20:24.312673', 'step': 5936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:24.376501', 'step': 5936, 'epoch': 1} {'type': 'loss', 'content': 0.12136457115411758, 'timestamp': '2025-09-30 22:20:24.378851', 'step': 5937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:24.435983', 'step': 5937, 'epoch': 1} {'type': 'loss', 'content': 0.1726582795381546, 'timestamp': '2025-09-30 22:20:24.438270', 'step': 5938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:24.495602', 'step': 5938, 'epoch': 1} {'type': 'loss', 'content': 0.16234725713729858, 'timestamp': '2025-09-30 22:20:24.497774', 'step': 5939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:24.558153', 'step': 5939, 'epoch': 1} {'type': 'loss', 'content': 0.34302961826324463, 'timestamp': '2025-09-30 22:20:24.565803', 'step': 5940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:24.622781', 'step': 5940, 'epoch': 1} {'type': 'loss', 'content': 0.20252448320388794, 'timestamp': '2025-09-30 22:20:24.625042', 'step': 5941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:24.694112', 'step': 5941, 'epoch': 1} {'type': 'loss', 'content': 0.14817620813846588, 'timestamp': '2025-09-30 22:20:24.697770', 'step': 5942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:24.758498', 'step': 5942, 'epoch': 1} {'type': 'loss', 'content': 0.07291653007268906, 'timestamp': '2025-09-30 22:20:24.762012', 'step': 5943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:24.821749', 'step': 5943, 'epoch': 1} {'type': 'loss', 'content': 0.16794081032276154, 'timestamp': '2025-09-30 22:20:24.827975', 'step': 5944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:24.884402', 'step': 5944, 'epoch': 1} {'type': 'loss', 'content': 0.23270848393440247, 'timestamp': '2025-09-30 22:20:24.886940', 'step': 5945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:24.944339', 'step': 5945, 'epoch': 1} {'type': 'loss', 'content': 0.1288217306137085, 'timestamp': '2025-09-30 22:20:24.946899', 'step': 5946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:25.004650', 'step': 5946, 'epoch': 1} {'type': 'loss', 'content': 0.1562301218509674, 'timestamp': '2025-09-30 22:20:25.008243', 'step': 5947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:25.068618', 'step': 5947, 'epoch': 1} {'type': 'loss', 'content': 0.21100609004497528, 'timestamp': '2025-09-30 22:20:25.074694', 'step': 5948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:25.131388', 'step': 5948, 'epoch': 1} {'type': 'loss', 'content': 0.21267521381378174, 'timestamp': '2025-09-30 22:20:25.134203', 'step': 5949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:25.191340', 'step': 5949, 'epoch': 1} {'type': 'loss', 'content': 0.1974152773618698, 'timestamp': '2025-09-30 22:20:25.201159', 'step': 5950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:25.259159', 'step': 5950, 'epoch': 1} {'type': 'loss', 'content': 0.16293419897556305, 'timestamp': '2025-09-30 22:20:25.264312', 'step': 5951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:25.322426', 'step': 5951, 'epoch': 1} {'type': 'loss', 'content': 0.16463859379291534, 'timestamp': '2025-09-30 22:20:25.329129', 'step': 5952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:25.385371', 'step': 5952, 'epoch': 1} {'type': 'loss', 'content': 0.14610238373279572, 'timestamp': '2025-09-30 22:20:25.388442', 'step': 5953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:25.449804', 'step': 5953, 'epoch': 1} {'type': 'loss', 'content': 0.11188206076622009, 'timestamp': '2025-09-30 22:20:25.455869', 'step': 5954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:25.512542', 'step': 5954, 'epoch': 1} {'type': 'loss', 'content': 0.13286520540714264, 'timestamp': '2025-09-30 22:20:25.516768', 'step': 5955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:25.580727', 'step': 5955, 'epoch': 1} {'type': 'loss', 'content': 0.12536078691482544, 'timestamp': '2025-09-30 22:20:25.587692', 'step': 5956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:25.645455', 'step': 5956, 'epoch': 1} {'type': 'loss', 'content': 0.13097694516181946, 'timestamp': '2025-09-30 22:20:25.648330', 'step': 5957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:25.711124', 'step': 5957, 'epoch': 1} {'type': 'loss', 'content': 0.1717231273651123, 'timestamp': '2025-09-30 22:20:25.715050', 'step': 5958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:25.772310', 'step': 5958, 'epoch': 1} {'type': 'loss', 'content': 0.2002955824136734, 'timestamp': '2025-09-30 22:20:25.779342', 'step': 5959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:25.837605', 'step': 5959, 'epoch': 1} {'type': 'loss', 'content': 0.0805111676454544, 'timestamp': '2025-09-30 22:20:25.844539', 'step': 5960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:25.901327', 'step': 5960, 'epoch': 1} {'type': 'loss', 'content': 0.26161065697669983, 'timestamp': '2025-09-30 22:20:25.904433', 'step': 5961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:25.961820', 'step': 5961, 'epoch': 1} {'type': 'loss', 'content': 0.13253828883171082, 'timestamp': '2025-09-30 22:20:25.965111', 'step': 5962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:26.022758', 'step': 5962, 'epoch': 1} {'type': 'loss', 'content': 0.10743683576583862, 'timestamp': '2025-09-30 22:20:26.025653', 'step': 5963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:26.082480', 'step': 5963, 'epoch': 1} {'type': 'loss', 'content': 0.0850311890244484, 'timestamp': '2025-09-30 22:20:26.088772', 'step': 5964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:26.145587', 'step': 5964, 'epoch': 1} {'type': 'loss', 'content': 0.1267162710428238, 'timestamp': '2025-09-30 22:20:26.148124', 'step': 5965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:26.204776', 'step': 5965, 'epoch': 1} {'type': 'loss', 'content': 0.13859383761882782, 'timestamp': '2025-09-30 22:20:26.207189', 'step': 5966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:26.264235', 'step': 5966, 'epoch': 1} {'type': 'loss', 'content': 0.11478646844625473, 'timestamp': '2025-09-30 22:20:26.266877', 'step': 5967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:26.325212', 'step': 5967, 'epoch': 1} {'type': 'loss', 'content': 0.15237799286842346, 'timestamp': '2025-09-30 22:20:26.331729', 'step': 5968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:26.388875', 'step': 5968, 'epoch': 1} {'type': 'loss', 'content': 0.11288373172283173, 'timestamp': '2025-09-30 22:20:26.391684', 'step': 5969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:26.449628', 'step': 5969, 'epoch': 1} {'type': 'loss', 'content': 0.1855422407388687, 'timestamp': '2025-09-30 22:20:26.452237', 'step': 5970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:26.509976', 'step': 5970, 'epoch': 1} {'type': 'loss', 'content': 0.11991480737924576, 'timestamp': '2025-09-30 22:20:26.512294', 'step': 5971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:20:26.569787', 'step': 5971, 'epoch': 1} {'type': 'loss', 'content': 0.13896511495113373, 'timestamp': '2025-09-30 22:20:26.576861', 'step': 5972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:26.633801', 'step': 5972, 'epoch': 1} {'type': 'loss', 'content': 0.15030881762504578, 'timestamp': '2025-09-30 22:20:26.637032', 'step': 5973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:26.696370', 'step': 5973, 'epoch': 1} {'type': 'loss', 'content': 0.1354088932275772, 'timestamp': '2025-09-30 22:20:26.698798', 'step': 5974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:26.756278', 'step': 5974, 'epoch': 1} {'type': 'loss', 'content': 0.22033409774303436, 'timestamp': '2025-09-30 22:20:26.758742', 'step': 5975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:26.816117', 'step': 5975, 'epoch': 1} {'type': 'loss', 'content': 0.14509426057338715, 'timestamp': '2025-09-30 22:20:26.822212', 'step': 5976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:26.880271', 'step': 5976, 'epoch': 1} {'type': 'loss', 'content': 0.1487962007522583, 'timestamp': '2025-09-30 22:20:26.882966', 'step': 5977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:26.940839', 'step': 5977, 'epoch': 1} {'type': 'loss', 'content': 0.12484317272901535, 'timestamp': '2025-09-30 22:20:26.943256', 'step': 5978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:27.000667', 'step': 5978, 'epoch': 1} {'type': 'loss', 'content': 0.256648451089859, 'timestamp': '2025-09-30 22:20:27.002913', 'step': 5979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:20:27.059618', 'step': 5979, 'epoch': 1} {'type': 'loss', 'content': 0.1510058045387268, 'timestamp': '2025-09-30 22:20:27.065932', 'step': 5980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:27.123042', 'step': 5980, 'epoch': 1} {'type': 'loss', 'content': 0.14709851145744324, 'timestamp': '2025-09-30 22:20:27.125631', 'step': 5981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:27.183653', 'step': 5981, 'epoch': 1} {'type': 'loss', 'content': 0.1791243702173233, 'timestamp': '2025-09-30 22:20:27.185943', 'step': 5982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:27.243347', 'step': 5982, 'epoch': 1} {'type': 'loss', 'content': 0.1858205795288086, 'timestamp': '2025-09-30 22:20:27.245900', 'step': 5983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:27.305571', 'step': 5983, 'epoch': 1} {'type': 'loss', 'content': 0.29140031337738037, 'timestamp': '2025-09-30 22:20:27.312285', 'step': 5984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:27.375182', 'step': 5984, 'epoch': 1} {'type': 'loss', 'content': 0.15673573315143585, 'timestamp': '2025-09-30 22:20:27.377405', 'step': 5985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:27.439070', 'step': 5985, 'epoch': 1} {'type': 'loss', 'content': 0.10709655284881592, 'timestamp': '2025-09-30 22:20:27.442434', 'step': 5986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:27.499764', 'step': 5986, 'epoch': 1} {'type': 'loss', 'content': 0.12009741365909576, 'timestamp': '2025-09-30 22:20:27.506746', 'step': 5987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:27.568780', 'step': 5987, 'epoch': 1} {'type': 'loss', 'content': 0.14907068014144897, 'timestamp': '2025-09-30 22:20:27.575683', 'step': 5988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:27.631726', 'step': 5988, 'epoch': 1} {'type': 'loss', 'content': 0.12620680034160614, 'timestamp': '2025-09-30 22:20:27.640710', 'step': 5989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:27.697600', 'step': 5989, 'epoch': 1} {'type': 'loss', 'content': 0.2398722618818283, 'timestamp': '2025-09-30 22:20:27.702692', 'step': 5990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:27.761098', 'step': 5990, 'epoch': 1} {'type': 'loss', 'content': 0.1105436235666275, 'timestamp': '2025-09-30 22:20:27.764498', 'step': 5991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:27.825929', 'step': 5991, 'epoch': 1} {'type': 'loss', 'content': 0.09433015435934067, 'timestamp': '2025-09-30 22:20:27.831877', 'step': 5992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:27.887607', 'step': 5992, 'epoch': 1} {'type': 'loss', 'content': 0.2302360087633133, 'timestamp': '2025-09-30 22:20:27.895918', 'step': 5993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:27.965905', 'step': 5993, 'epoch': 1} {'type': 'loss', 'content': 0.17892472445964813, 'timestamp': '2025-09-30 22:20:27.968317', 'step': 5994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:28.026762', 'step': 5994, 'epoch': 1} {'type': 'loss', 'content': 0.2529570758342743, 'timestamp': '2025-09-30 22:20:28.029114', 'step': 5995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:28.086721', 'step': 5995, 'epoch': 1} {'type': 'loss', 'content': 0.13097229599952698, 'timestamp': '2025-09-30 22:20:28.092746', 'step': 5996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:28.149642', 'step': 5996, 'epoch': 1} {'type': 'loss', 'content': 0.1581282913684845, 'timestamp': '2025-09-30 22:20:28.152478', 'step': 5997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:28.209312', 'step': 5997, 'epoch': 1} {'type': 'loss', 'content': 0.153749018907547, 'timestamp': '2025-09-30 22:20:28.213338', 'step': 5998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:28.269985', 'step': 5998, 'epoch': 1} {'type': 'loss', 'content': 0.2005881816148758, 'timestamp': '2025-09-30 22:20:28.272745', 'step': 5999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:28.329246', 'step': 5999, 'epoch': 1} {'type': 'loss', 'content': 0.21561786532402039, 'timestamp': '2025-09-30 22:20:28.335819', 'step': 6000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-30 22:20:28.725776', 'step': 6000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:28.791877', 'step': 6000, 'epoch': 1} {'type': 'loss', 'content': 0.14550484716892242, 'timestamp': '2025-09-30 22:20:28.795235', 'step': 6001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:28.860402', 'step': 6001, 'epoch': 1} {'type': 'loss', 'content': 0.1762881726026535, 'timestamp': '2025-09-30 22:20:28.865157', 'step': 6002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:28.925420', 'step': 6002, 'epoch': 1} {'type': 'loss', 'content': 0.17443200945854187, 'timestamp': '2025-09-30 22:20:28.928331', 'step': 6003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:28.987687', 'step': 6003, 'epoch': 1} {'type': 'loss', 'content': 0.11822128295898438, 'timestamp': '2025-09-30 22:20:28.994766', 'step': 6004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:29.052768', 'step': 6004, 'epoch': 1} {'type': 'loss', 'content': 0.1388445794582367, 'timestamp': '2025-09-30 22:20:29.054978', 'step': 6005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:29.127635', 'step': 6005, 'epoch': 1} {'type': 'loss', 'content': 0.09680105000734329, 'timestamp': '2025-09-30 22:20:29.130916', 'step': 6006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:29.189027', 'step': 6006, 'epoch': 1} {'type': 'loss', 'content': 0.21464665234088898, 'timestamp': '2025-09-30 22:20:29.191370', 'step': 6007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:29.250298', 'step': 6007, 'epoch': 1} {'type': 'loss', 'content': 0.09393614530563354, 'timestamp': '2025-09-30 22:20:29.256625', 'step': 6008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:29.316622', 'step': 6008, 'epoch': 1} {'type': 'loss', 'content': 0.1300455629825592, 'timestamp': '2025-09-30 22:20:29.318999', 'step': 6009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:29.375781', 'step': 6009, 'epoch': 1} {'type': 'loss', 'content': 0.13952317833900452, 'timestamp': '2025-09-30 22:20:29.379572', 'step': 6010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:29.437149', 'step': 6010, 'epoch': 1} {'type': 'loss', 'content': 0.19498799741268158, 'timestamp': '2025-09-30 22:20:29.442349', 'step': 6011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:29.501142', 'step': 6011, 'epoch': 1} {'type': 'loss', 'content': 0.15590347349643707, 'timestamp': '2025-09-30 22:20:29.507651', 'step': 6012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:29.565796', 'step': 6012, 'epoch': 1} {'type': 'loss', 'content': 0.19633007049560547, 'timestamp': '2025-09-30 22:20:29.568278', 'step': 6013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:29.642172', 'step': 6013, 'epoch': 1} {'type': 'loss', 'content': 0.180964857339859, 'timestamp': '2025-09-30 22:20:29.645666', 'step': 6014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:20:29.712082', 'step': 6014, 'epoch': 1} {'type': 'loss', 'content': 0.12104327231645584, 'timestamp': '2025-09-30 22:20:29.721395', 'step': 6015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:29.787935', 'step': 6015, 'epoch': 1} {'type': 'loss', 'content': 0.18219870328903198, 'timestamp': '2025-09-30 22:20:29.797236', 'step': 6016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:29.858316', 'step': 6016, 'epoch': 1} {'type': 'loss', 'content': 0.15124811232089996, 'timestamp': '2025-09-30 22:20:29.860475', 'step': 6017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:29.920740', 'step': 6017, 'epoch': 1} {'type': 'loss', 'content': 0.09859948605298996, 'timestamp': '2025-09-30 22:20:29.923138', 'step': 6018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:29.986470', 'step': 6018, 'epoch': 1} {'type': 'loss', 'content': 0.10215985774993896, 'timestamp': '2025-09-30 22:20:29.990847', 'step': 6019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:30.049612', 'step': 6019, 'epoch': 1} {'type': 'loss', 'content': 0.22007635235786438, 'timestamp': '2025-09-30 22:20:30.056165', 'step': 6020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:30.112293', 'step': 6020, 'epoch': 1} {'type': 'loss', 'content': 0.18167291581630707, 'timestamp': '2025-09-30 22:20:30.117722', 'step': 6021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:30.174704', 'step': 6021, 'epoch': 1} {'type': 'loss', 'content': 0.12231814861297607, 'timestamp': '2025-09-30 22:20:30.177877', 'step': 6022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:30.234442', 'step': 6022, 'epoch': 1} {'type': 'loss', 'content': 0.11986305564641953, 'timestamp': '2025-09-30 22:20:30.239914', 'step': 6023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:30.303083', 'step': 6023, 'epoch': 1} {'type': 'loss', 'content': 0.16831673681735992, 'timestamp': '2025-09-30 22:20:30.310661', 'step': 6024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:30.370482', 'step': 6024, 'epoch': 1} {'type': 'loss', 'content': 0.14036762714385986, 'timestamp': '2025-09-30 22:20:30.374037', 'step': 6025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:30.436684', 'step': 6025, 'epoch': 1} {'type': 'loss', 'content': 0.14437726140022278, 'timestamp': '2025-09-30 22:20:30.439997', 'step': 6026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:30.498791', 'step': 6026, 'epoch': 1} {'type': 'loss', 'content': 0.10982373356819153, 'timestamp': '2025-09-30 22:20:30.501394', 'step': 6027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:30.561154', 'step': 6027, 'epoch': 1} {'type': 'loss', 'content': 0.15512561798095703, 'timestamp': '2025-09-30 22:20:30.568010', 'step': 6028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:30.624692', 'step': 6028, 'epoch': 1} {'type': 'loss', 'content': 0.18335244059562683, 'timestamp': '2025-09-30 22:20:30.628298', 'step': 6029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:30.686388', 'step': 6029, 'epoch': 1} {'type': 'loss', 'content': 0.12211066484451294, 'timestamp': '2025-09-30 22:20:30.689499', 'step': 6030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:30.750243', 'step': 6030, 'epoch': 1} {'type': 'loss', 'content': 0.0784202516078949, 'timestamp': '2025-09-30 22:20:30.753696', 'step': 6031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:30.810359', 'step': 6031, 'epoch': 1} {'type': 'loss', 'content': 0.15445318818092346, 'timestamp': '2025-09-30 22:20:30.816749', 'step': 6032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:30.873455', 'step': 6032, 'epoch': 1} {'type': 'loss', 'content': 0.18485954403877258, 'timestamp': '2025-09-30 22:20:30.877071', 'step': 6033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:30.933624', 'step': 6033, 'epoch': 1} {'type': 'loss', 'content': 0.17618848383426666, 'timestamp': '2025-09-30 22:20:30.936298', 'step': 6034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:30.993324', 'step': 6034, 'epoch': 1} {'type': 'loss', 'content': 0.21302303671836853, 'timestamp': '2025-09-30 22:20:30.995841', 'step': 6035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.056872', 'step': 6035, 'epoch': 1} {'type': 'loss', 'content': 0.23173409700393677, 'timestamp': '2025-09-30 22:20:31.063510', 'step': 6036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:31.120797', 'step': 6036, 'epoch': 1} {'type': 'loss', 'content': 0.19435715675354004, 'timestamp': '2025-09-30 22:20:31.126929', 'step': 6037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:31.188218', 'step': 6037, 'epoch': 1} {'type': 'loss', 'content': 0.1447351723909378, 'timestamp': '2025-09-30 22:20:31.190752', 'step': 6038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.247951', 'step': 6038, 'epoch': 1} {'type': 'loss', 'content': 0.19094930589199066, 'timestamp': '2025-09-30 22:20:31.251370', 'step': 6039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.308709', 'step': 6039, 'epoch': 1} {'type': 'loss', 'content': 0.11297252774238586, 'timestamp': '2025-09-30 22:20:31.317516', 'step': 6040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:31.375558', 'step': 6040, 'epoch': 1} {'type': 'loss', 'content': 0.16913479566574097, 'timestamp': '2025-09-30 22:20:31.378161', 'step': 6041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.436193', 'step': 6041, 'epoch': 1} {'type': 'loss', 'content': 0.14668352901935577, 'timestamp': '2025-09-30 22:20:31.440898', 'step': 6042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:31.499974', 'step': 6042, 'epoch': 1} {'type': 'loss', 'content': 0.21746774017810822, 'timestamp': '2025-09-30 22:20:31.502351', 'step': 6043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.561908', 'step': 6043, 'epoch': 1} {'type': 'loss', 'content': 0.13342414796352386, 'timestamp': '2025-09-30 22:20:31.568772', 'step': 6044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:31.626840', 'step': 6044, 'epoch': 1} {'type': 'loss', 'content': 0.13173317909240723, 'timestamp': '2025-09-30 22:20:31.629632', 'step': 6045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:31.686233', 'step': 6045, 'epoch': 1} {'type': 'loss', 'content': 0.15003196895122528, 'timestamp': '2025-09-30 22:20:31.688736', 'step': 6046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.745941', 'step': 6046, 'epoch': 1} {'type': 'loss', 'content': 0.11082440614700317, 'timestamp': '2025-09-30 22:20:31.748404', 'step': 6047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.810308', 'step': 6047, 'epoch': 1} {'type': 'loss', 'content': 0.182533398270607, 'timestamp': '2025-09-30 22:20:31.818681', 'step': 6048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:31.875404', 'step': 6048, 'epoch': 1} {'type': 'loss', 'content': 0.09701919555664062, 'timestamp': '2025-09-30 22:20:31.880737', 'step': 6049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:31.937562', 'step': 6049, 'epoch': 1} {'type': 'loss', 'content': 0.16893209517002106, 'timestamp': '2025-09-30 22:20:31.940688', 'step': 6050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:31.998592', 'step': 6050, 'epoch': 1} {'type': 'loss', 'content': 0.2728630304336548, 'timestamp': '2025-09-30 22:20:32.001560', 'step': 6051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:32.067967', 'step': 6051, 'epoch': 1} {'type': 'loss', 'content': 0.14635059237480164, 'timestamp': '2025-09-30 22:20:32.078205', 'step': 6052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:32.134694', 'step': 6052, 'epoch': 1} {'type': 'loss', 'content': 0.08513297140598297, 'timestamp': '2025-09-30 22:20:32.137598', 'step': 6053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:32.196456', 'step': 6053, 'epoch': 1} {'type': 'loss', 'content': 0.12482210248708725, 'timestamp': '2025-09-30 22:20:32.201098', 'step': 6054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:32.258314', 'step': 6054, 'epoch': 1} {'type': 'loss', 'content': 0.1554044783115387, 'timestamp': '2025-09-30 22:20:32.264796', 'step': 6055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:32.321477', 'step': 6055, 'epoch': 1} {'type': 'loss', 'content': 0.17776145040988922, 'timestamp': '2025-09-30 22:20:32.329380', 'step': 6056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:32.393749', 'step': 6056, 'epoch': 1} {'type': 'loss', 'content': 0.11235412210226059, 'timestamp': '2025-09-30 22:20:32.396645', 'step': 6057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:32.454069', 'step': 6057, 'epoch': 1} {'type': 'loss', 'content': 0.15708968043327332, 'timestamp': '2025-09-30 22:20:32.457878', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:20:46.209978', 'step': 6058, 'epoch': 1} {'type': 'pplx', 'content': 12428.821007830096, 'timestamp': '2025-09-30 22:20:46.223688', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:46.281900', 'step': 6058, 'epoch': 1} {'type': 'loss', 'content': 0.32162535190582275, 'timestamp': '2025-09-30 22:20:46.285914', 'step': 6059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:46.344050', 'step': 6059, 'epoch': 1} {'type': 'loss', 'content': 0.18552939593791962, 'timestamp': '2025-09-30 22:20:46.351394', 'step': 6060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:46.418287', 'step': 6060, 'epoch': 1} {'type': 'loss', 'content': 0.19507423043251038, 'timestamp': '2025-09-30 22:20:46.421549', 'step': 6061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:46.478554', 'step': 6061, 'epoch': 1} {'type': 'loss', 'content': 0.1889737993478775, 'timestamp': '2025-09-30 22:20:46.481655', 'step': 6062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:46.540319', 'step': 6062, 'epoch': 1} {'type': 'loss', 'content': 0.07264963537454605, 'timestamp': '2025-09-30 22:20:46.543433', 'step': 6063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:46.601454', 'step': 6063, 'epoch': 1} {'type': 'loss', 'content': 0.0844334065914154, 'timestamp': '2025-09-30 22:20:46.607482', 'step': 6064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:46.664065', 'step': 6064, 'epoch': 1} {'type': 'loss', 'content': 0.24230824410915375, 'timestamp': '2025-09-30 22:20:46.666146', 'step': 6065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:46.732867', 'step': 6065, 'epoch': 1} {'type': 'loss', 'content': 0.08976013213396072, 'timestamp': '2025-09-30 22:20:46.735420', 'step': 6066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:46.793313', 'step': 6066, 'epoch': 1} {'type': 'loss', 'content': 0.1763637363910675, 'timestamp': '2025-09-30 22:20:46.796340', 'step': 6067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:46.854043', 'step': 6067, 'epoch': 1} {'type': 'loss', 'content': 0.1321975290775299, 'timestamp': '2025-09-30 22:20:46.860437', 'step': 6068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:46.917600', 'step': 6068, 'epoch': 1} {'type': 'loss', 'content': 0.17042450606822968, 'timestamp': '2025-09-30 22:20:46.920365', 'step': 6069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:46.977713', 'step': 6069, 'epoch': 1} {'type': 'loss', 'content': 0.13058054447174072, 'timestamp': '2025-09-30 22:20:46.986791', 'step': 6070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:47.058673', 'step': 6070, 'epoch': 1} {'type': 'loss', 'content': 0.22067496180534363, 'timestamp': '2025-09-30 22:20:47.061602', 'step': 6071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:47.123675', 'step': 6071, 'epoch': 1} {'type': 'loss', 'content': 0.12662671506404877, 'timestamp': '2025-09-30 22:20:47.130251', 'step': 6072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:47.192639', 'step': 6072, 'epoch': 1} {'type': 'loss', 'content': 0.10747657716274261, 'timestamp': '2025-09-30 22:20:47.195825', 'step': 6073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:47.254693', 'step': 6073, 'epoch': 1} {'type': 'loss', 'content': 0.16107630729675293, 'timestamp': '2025-09-30 22:20:47.257198', 'step': 6074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:47.318786', 'step': 6074, 'epoch': 1} {'type': 'loss', 'content': 0.16108466684818268, 'timestamp': '2025-09-30 22:20:47.321801', 'step': 6075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:47.380184', 'step': 6075, 'epoch': 1} {'type': 'loss', 'content': 0.199172705411911, 'timestamp': '2025-09-30 22:20:47.387543', 'step': 6076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:47.445477', 'step': 6076, 'epoch': 1} {'type': 'loss', 'content': 0.10903293639421463, 'timestamp': '2025-09-30 22:20:47.448833', 'step': 6077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:47.506667', 'step': 6077, 'epoch': 1} {'type': 'loss', 'content': 0.09019754827022552, 'timestamp': '2025-09-30 22:20:47.509628', 'step': 6078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:47.568179', 'step': 6078, 'epoch': 1} {'type': 'loss', 'content': 0.1514655500650406, 'timestamp': '2025-09-30 22:20:47.570905', 'step': 6079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:47.628073', 'step': 6079, 'epoch': 1} {'type': 'loss', 'content': 0.1978030800819397, 'timestamp': '2025-09-30 22:20:47.634398', 'step': 6080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:47.690470', 'step': 6080, 'epoch': 1} {'type': 'loss', 'content': 0.21133945882320404, 'timestamp': '2025-09-30 22:20:47.693441', 'step': 6081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:47.752431', 'step': 6081, 'epoch': 1} {'type': 'loss', 'content': 0.08968637883663177, 'timestamp': '2025-09-30 22:20:47.759012', 'step': 6082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:47.824359', 'step': 6082, 'epoch': 1} {'type': 'loss', 'content': 0.1445416510105133, 'timestamp': '2025-09-30 22:20:47.828121', 'step': 6083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:47.890134', 'step': 6083, 'epoch': 1} {'type': 'loss', 'content': 0.13412170112133026, 'timestamp': '2025-09-30 22:20:47.896737', 'step': 6084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:47.952423', 'step': 6084, 'epoch': 1} {'type': 'loss', 'content': 0.20326514542102814, 'timestamp': '2025-09-30 22:20:47.955128', 'step': 6085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.022443', 'step': 6085, 'epoch': 1} {'type': 'loss', 'content': 0.20088930428028107, 'timestamp': '2025-09-30 22:20:48.026369', 'step': 6086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:48.095362', 'step': 6086, 'epoch': 1} {'type': 'loss', 'content': 0.3295353949069977, 'timestamp': '2025-09-30 22:20:48.104891', 'step': 6087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:48.162554', 'step': 6087, 'epoch': 1} {'type': 'loss', 'content': 0.209719717502594, 'timestamp': '2025-09-30 22:20:48.169457', 'step': 6088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.226141', 'step': 6088, 'epoch': 1} {'type': 'loss', 'content': 0.10203015059232712, 'timestamp': '2025-09-30 22:20:48.228983', 'step': 6089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.285392', 'step': 6089, 'epoch': 1} {'type': 'loss', 'content': 0.19401980936527252, 'timestamp': '2025-09-30 22:20:48.294329', 'step': 6090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:48.351295', 'step': 6090, 'epoch': 1} {'type': 'loss', 'content': 0.1444096863269806, 'timestamp': '2025-09-30 22:20:48.358520', 'step': 6091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:48.415288', 'step': 6091, 'epoch': 1} {'type': 'loss', 'content': 0.23410440981388092, 'timestamp': '2025-09-30 22:20:48.422085', 'step': 6092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.478514', 'step': 6092, 'epoch': 1} {'type': 'loss', 'content': 0.1303950995206833, 'timestamp': '2025-09-30 22:20:48.481203', 'step': 6093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.538742', 'step': 6093, 'epoch': 1} {'type': 'loss', 'content': 0.11230768263339996, 'timestamp': '2025-09-30 22:20:48.541315', 'step': 6094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.598375', 'step': 6094, 'epoch': 1} {'type': 'loss', 'content': 0.25202271342277527, 'timestamp': '2025-09-30 22:20:48.601567', 'step': 6095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.659562', 'step': 6095, 'epoch': 1} {'type': 'loss', 'content': 0.22203810513019562, 'timestamp': '2025-09-30 22:20:48.672087', 'step': 6096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:48.732742', 'step': 6096, 'epoch': 1} {'type': 'loss', 'content': 0.13041891157627106, 'timestamp': '2025-09-30 22:20:48.736533', 'step': 6097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:48.801140', 'step': 6097, 'epoch': 1} {'type': 'loss', 'content': 0.11701086908578873, 'timestamp': '2025-09-30 22:20:48.809023', 'step': 6098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:48.866261', 'step': 6098, 'epoch': 1} {'type': 'loss', 'content': 0.15138910710811615, 'timestamp': '2025-09-30 22:20:48.869340', 'step': 6099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:48.927165', 'step': 6099, 'epoch': 1} {'type': 'loss', 'content': 0.12026359140872955, 'timestamp': '2025-09-30 22:20:48.936975', 'step': 6100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:48.993961', 'step': 6100, 'epoch': 1} {'type': 'loss', 'content': 0.13820548355579376, 'timestamp': '2025-09-30 22:20:48.997753', 'step': 6101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:49.054850', 'step': 6101, 'epoch': 1} {'type': 'loss', 'content': 0.1227792277932167, 'timestamp': '2025-09-30 22:20:49.057685', 'step': 6102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:49.114455', 'step': 6102, 'epoch': 1} {'type': 'loss', 'content': 0.1397455483675003, 'timestamp': '2025-09-30 22:20:49.117859', 'step': 6103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:49.174039', 'step': 6103, 'epoch': 1} {'type': 'loss', 'content': 0.1648588627576828, 'timestamp': '2025-09-30 22:20:49.180284', 'step': 6104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:49.235806', 'step': 6104, 'epoch': 1} {'type': 'loss', 'content': 0.1560646891593933, 'timestamp': '2025-09-30 22:20:49.239121', 'step': 6105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:49.296711', 'step': 6105, 'epoch': 1} {'type': 'loss', 'content': 0.13029243052005768, 'timestamp': '2025-09-30 22:20:49.299469', 'step': 6106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:49.357854', 'step': 6106, 'epoch': 1} {'type': 'loss', 'content': 0.20425540208816528, 'timestamp': '2025-09-30 22:20:49.360839', 'step': 6107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:49.417779', 'step': 6107, 'epoch': 1} {'type': 'loss', 'content': 0.19520577788352966, 'timestamp': '2025-09-30 22:20:49.424305', 'step': 6108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:49.483251', 'step': 6108, 'epoch': 1} {'type': 'loss', 'content': 0.18666666746139526, 'timestamp': '2025-09-30 22:20:49.485976', 'step': 6109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:49.545704', 'step': 6109, 'epoch': 1} {'type': 'loss', 'content': 0.1340976059436798, 'timestamp': '2025-09-30 22:20:49.550434', 'step': 6110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:49.607940', 'step': 6110, 'epoch': 1} {'type': 'loss', 'content': 0.16402582824230194, 'timestamp': '2025-09-30 22:20:49.612493', 'step': 6111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:49.670586', 'step': 6111, 'epoch': 1} {'type': 'loss', 'content': 0.18169011175632477, 'timestamp': '2025-09-30 22:20:49.676865', 'step': 6112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:49.734136', 'step': 6112, 'epoch': 1} {'type': 'loss', 'content': 0.13959622383117676, 'timestamp': '2025-09-30 22:20:49.737125', 'step': 6113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:49.795260', 'step': 6113, 'epoch': 1} {'type': 'loss', 'content': 0.13036128878593445, 'timestamp': '2025-09-30 22:20:49.798532', 'step': 6114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:49.856340', 'step': 6114, 'epoch': 1} {'type': 'loss', 'content': 0.12407776713371277, 'timestamp': '2025-09-30 22:20:49.859629', 'step': 6115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:49.923636', 'step': 6115, 'epoch': 1} {'type': 'loss', 'content': 0.20184443891048431, 'timestamp': '2025-09-30 22:20:49.930274', 'step': 6116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:49.999069', 'step': 6116, 'epoch': 1} {'type': 'loss', 'content': 0.19504068791866302, 'timestamp': '2025-09-30 22:20:50.007932', 'step': 6117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:50.064420', 'step': 6117, 'epoch': 1} {'type': 'loss', 'content': 0.1329951286315918, 'timestamp': '2025-09-30 22:20:50.069857', 'step': 6118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:50.128314', 'step': 6118, 'epoch': 1} {'type': 'loss', 'content': 0.15300056338310242, 'timestamp': '2025-09-30 22:20:50.133182', 'step': 6119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:50.192317', 'step': 6119, 'epoch': 1} {'type': 'loss', 'content': 0.21457655727863312, 'timestamp': '2025-09-30 22:20:50.198572', 'step': 6120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:50.254748', 'step': 6120, 'epoch': 1} {'type': 'loss', 'content': 0.10778474807739258, 'timestamp': '2025-09-30 22:20:50.258664', 'step': 6121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:50.316432', 'step': 6121, 'epoch': 1} {'type': 'loss', 'content': 0.20492398738861084, 'timestamp': '2025-09-30 22:20:50.320631', 'step': 6122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:50.378976', 'step': 6122, 'epoch': 1} {'type': 'loss', 'content': 0.1927269846200943, 'timestamp': '2025-09-30 22:20:50.383147', 'step': 6123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:50.441105', 'step': 6123, 'epoch': 1} {'type': 'loss', 'content': 0.10355490446090698, 'timestamp': '2025-09-30 22:20:50.453524', 'step': 6124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:50.510462', 'step': 6124, 'epoch': 1} {'type': 'loss', 'content': 0.27525603771209717, 'timestamp': '2025-09-30 22:20:50.520552', 'step': 6125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:50.583123', 'step': 6125, 'epoch': 1} {'type': 'loss', 'content': 0.1873105764389038, 'timestamp': '2025-09-30 22:20:50.586141', 'step': 6126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:50.647970', 'step': 6126, 'epoch': 1} {'type': 'loss', 'content': 0.09275519847869873, 'timestamp': '2025-09-30 22:20:50.652967', 'step': 6127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:50.712740', 'step': 6127, 'epoch': 1} {'type': 'loss', 'content': 0.18134111166000366, 'timestamp': '2025-09-30 22:20:50.718518', 'step': 6128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:50.779179', 'step': 6128, 'epoch': 1} {'type': 'loss', 'content': 0.12521837651729584, 'timestamp': '2025-09-30 22:20:50.781591', 'step': 6129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:50.839087', 'step': 6129, 'epoch': 1} {'type': 'loss', 'content': 0.16700339317321777, 'timestamp': '2025-09-30 22:20:50.842928', 'step': 6130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:50.901423', 'step': 6130, 'epoch': 1} {'type': 'loss', 'content': 0.10723775625228882, 'timestamp': '2025-09-30 22:20:50.906330', 'step': 6131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:50.963595', 'step': 6131, 'epoch': 1} {'type': 'loss', 'content': 0.30103349685668945, 'timestamp': '2025-09-30 22:20:50.971175', 'step': 6132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:51.028554', 'step': 6132, 'epoch': 1} {'type': 'loss', 'content': 0.15653881430625916, 'timestamp': '2025-09-30 22:20:51.032905', 'step': 6133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:51.089827', 'step': 6133, 'epoch': 1} {'type': 'loss', 'content': 0.11934444308280945, 'timestamp': '2025-09-30 22:20:51.093134', 'step': 6134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:51.150887', 'step': 6134, 'epoch': 1} {'type': 'loss', 'content': 0.18349875509738922, 'timestamp': '2025-09-30 22:20:51.155611', 'step': 6135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:51.212271', 'step': 6135, 'epoch': 1} {'type': 'loss', 'content': 0.13264553248882294, 'timestamp': '2025-09-30 22:20:51.220671', 'step': 6136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:51.277152', 'step': 6136, 'epoch': 1} {'type': 'loss', 'content': 0.16221538186073303, 'timestamp': '2025-09-30 22:20:51.280313', 'step': 6137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:51.340557', 'step': 6137, 'epoch': 1} {'type': 'loss', 'content': 0.2327384054660797, 'timestamp': '2025-09-30 22:20:51.347164', 'step': 6138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:51.421925', 'step': 6138, 'epoch': 1} {'type': 'loss', 'content': 0.2278410941362381, 'timestamp': '2025-09-30 22:20:51.424226', 'step': 6139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:51.507299', 'step': 6139, 'epoch': 1} {'type': 'loss', 'content': 0.13201336562633514, 'timestamp': '2025-09-30 22:20:51.513276', 'step': 6140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:51.593947', 'step': 6140, 'epoch': 1} {'type': 'loss', 'content': 0.13631203770637512, 'timestamp': '2025-09-30 22:20:51.597474', 'step': 6141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:51.695359', 'step': 6141, 'epoch': 1} {'type': 'loss', 'content': 0.26375555992126465, 'timestamp': '2025-09-30 22:20:51.701160', 'step': 6142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:51.767364', 'step': 6142, 'epoch': 1} {'type': 'loss', 'content': 0.200260728597641, 'timestamp': '2025-09-30 22:20:51.777498', 'step': 6143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:51.856246', 'step': 6143, 'epoch': 1} {'type': 'loss', 'content': 0.1133698895573616, 'timestamp': '2025-09-30 22:20:51.862549', 'step': 6144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:51.942704', 'step': 6144, 'epoch': 1} {'type': 'loss', 'content': 0.21393892168998718, 'timestamp': '2025-09-30 22:20:51.950405', 'step': 6145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:52.033439', 'step': 6145, 'epoch': 1} {'type': 'loss', 'content': 0.1510525792837143, 'timestamp': '2025-09-30 22:20:52.037402', 'step': 6146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:52.111962', 'step': 6146, 'epoch': 1} {'type': 'loss', 'content': 0.17604276537895203, 'timestamp': '2025-09-30 22:20:52.123175', 'step': 6147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:52.190572', 'step': 6147, 'epoch': 1} {'type': 'loss', 'content': 0.1620732545852661, 'timestamp': '2025-09-30 22:20:52.196801', 'step': 6148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:52.296782', 'step': 6148, 'epoch': 1} {'type': 'loss', 'content': 0.1510186493396759, 'timestamp': '2025-09-30 22:20:52.299507', 'step': 6149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:52.377267', 'step': 6149, 'epoch': 1} {'type': 'loss', 'content': 0.17562900483608246, 'timestamp': '2025-09-30 22:20:52.381860', 'step': 6150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:52.459840', 'step': 6150, 'epoch': 1} {'type': 'loss', 'content': 0.213484987616539, 'timestamp': '2025-09-30 22:20:52.465409', 'step': 6151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:52.538310', 'step': 6151, 'epoch': 1} {'type': 'loss', 'content': 0.17225413024425507, 'timestamp': '2025-09-30 22:20:52.547258', 'step': 6152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:52.628348', 'step': 6152, 'epoch': 1} {'type': 'loss', 'content': 0.24993768334388733, 'timestamp': '2025-09-30 22:20:52.631103', 'step': 6153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:52.710010', 'step': 6153, 'epoch': 1} {'type': 'loss', 'content': 0.12015988677740097, 'timestamp': '2025-09-30 22:20:52.717546', 'step': 6154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:52.780449', 'step': 6154, 'epoch': 1} {'type': 'loss', 'content': 0.1860799789428711, 'timestamp': '2025-09-30 22:20:52.784407', 'step': 6155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:52.842351', 'step': 6155, 'epoch': 1} {'type': 'loss', 'content': 0.2012111395597458, 'timestamp': '2025-09-30 22:20:52.850285', 'step': 6156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:52.908184', 'step': 6156, 'epoch': 1} {'type': 'loss', 'content': 0.16403049230575562, 'timestamp': '2025-09-30 22:20:52.912729', 'step': 6157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:52.970703', 'step': 6157, 'epoch': 1} {'type': 'loss', 'content': 0.3051966428756714, 'timestamp': '2025-09-30 22:20:52.974969', 'step': 6158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:53.033151', 'step': 6158, 'epoch': 1} {'type': 'loss', 'content': 0.18282762169837952, 'timestamp': '2025-09-30 22:20:53.036182', 'step': 6159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.093519', 'step': 6159, 'epoch': 1} {'type': 'loss', 'content': 0.117031030356884, 'timestamp': '2025-09-30 22:20:53.099935', 'step': 6160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.157863', 'step': 6160, 'epoch': 1} {'type': 'loss', 'content': 0.11360079795122147, 'timestamp': '2025-09-30 22:20:53.167642', 'step': 6161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.224620', 'step': 6161, 'epoch': 1} {'type': 'loss', 'content': 0.15344075858592987, 'timestamp': '2025-09-30 22:20:53.227961', 'step': 6162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:53.292506', 'step': 6162, 'epoch': 1} {'type': 'loss', 'content': 0.11707138270139694, 'timestamp': '2025-09-30 22:20:53.300218', 'step': 6163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.364939', 'step': 6163, 'epoch': 1} {'type': 'loss', 'content': 0.1597055196762085, 'timestamp': '2025-09-30 22:20:53.383632', 'step': 6164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.446700', 'step': 6164, 'epoch': 1} {'type': 'loss', 'content': 0.18379490077495575, 'timestamp': '2025-09-30 22:20:53.449571', 'step': 6165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:53.507270', 'step': 6165, 'epoch': 1} {'type': 'loss', 'content': 0.09147512912750244, 'timestamp': '2025-09-30 22:20:53.510065', 'step': 6166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:53.568257', 'step': 6166, 'epoch': 1} {'type': 'loss', 'content': 0.1686815470457077, 'timestamp': '2025-09-30 22:20:53.575806', 'step': 6167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:53.633240', 'step': 6167, 'epoch': 1} {'type': 'loss', 'content': 0.15165342390537262, 'timestamp': '2025-09-30 22:20:53.640179', 'step': 6168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.700602', 'step': 6168, 'epoch': 1} {'type': 'loss', 'content': 0.20354107022285461, 'timestamp': '2025-09-30 22:20:53.704469', 'step': 6169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.767069', 'step': 6169, 'epoch': 1} {'type': 'loss', 'content': 0.11839069426059723, 'timestamp': '2025-09-30 22:20:53.779767', 'step': 6170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:53.838431', 'step': 6170, 'epoch': 1} {'type': 'loss', 'content': 0.16369043290615082, 'timestamp': '2025-09-30 22:20:53.842219', 'step': 6171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.900238', 'step': 6171, 'epoch': 1} {'type': 'loss', 'content': 0.14462049305438995, 'timestamp': '2025-09-30 22:20:53.906783', 'step': 6172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:53.964948', 'step': 6172, 'epoch': 1} {'type': 'loss', 'content': 0.11695238947868347, 'timestamp': '2025-09-30 22:20:53.967751', 'step': 6173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:54.029373', 'step': 6173, 'epoch': 1} {'type': 'loss', 'content': 0.22702616453170776, 'timestamp': '2025-09-30 22:20:54.034556', 'step': 6174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:54.092311', 'step': 6174, 'epoch': 1} {'type': 'loss', 'content': 0.16383086144924164, 'timestamp': '2025-09-30 22:20:54.095802', 'step': 6175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:54.153255', 'step': 6175, 'epoch': 1} {'type': 'loss', 'content': 0.16187889873981476, 'timestamp': '2025-09-30 22:20:54.167649', 'step': 6176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:54.224700', 'step': 6176, 'epoch': 1} {'type': 'loss', 'content': 0.1725766658782959, 'timestamp': '2025-09-30 22:20:54.228919', 'step': 6177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:54.286199', 'step': 6177, 'epoch': 1} {'type': 'loss', 'content': 0.22729705274105072, 'timestamp': '2025-09-30 22:20:54.289299', 'step': 6178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:54.347735', 'step': 6178, 'epoch': 1} {'type': 'loss', 'content': 0.13728001713752747, 'timestamp': '2025-09-30 22:20:54.350355', 'step': 6179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:54.407637', 'step': 6179, 'epoch': 1} {'type': 'loss', 'content': 0.22537358105182648, 'timestamp': '2025-09-30 22:20:54.415321', 'step': 6180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:54.475012', 'step': 6180, 'epoch': 1} {'type': 'loss', 'content': 0.13814643025398254, 'timestamp': '2025-09-30 22:20:54.478273', 'step': 6181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:54.536637', 'step': 6181, 'epoch': 1} {'type': 'loss', 'content': 0.13242043554782867, 'timestamp': '2025-09-30 22:20:54.539996', 'step': 6182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:54.597054', 'step': 6182, 'epoch': 1} {'type': 'loss', 'content': 0.17329169809818268, 'timestamp': '2025-09-30 22:20:54.600786', 'step': 6183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:54.669772', 'step': 6183, 'epoch': 1} {'type': 'loss', 'content': 0.1869363784790039, 'timestamp': '2025-09-30 22:20:54.677746', 'step': 6184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:54.734052', 'step': 6184, 'epoch': 1} {'type': 'loss', 'content': 0.16781118512153625, 'timestamp': '2025-09-30 22:20:54.737360', 'step': 6185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:54.794871', 'step': 6185, 'epoch': 1} {'type': 'loss', 'content': 0.12846077978610992, 'timestamp': '2025-09-30 22:20:54.798212', 'step': 6186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:54.855645', 'step': 6186, 'epoch': 1} {'type': 'loss', 'content': 0.23731333017349243, 'timestamp': '2025-09-30 22:20:54.859157', 'step': 6187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:54.917205', 'step': 6187, 'epoch': 1} {'type': 'loss', 'content': 0.17579959332942963, 'timestamp': '2025-09-30 22:20:54.923241', 'step': 6188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:54.979219', 'step': 6188, 'epoch': 1} {'type': 'loss', 'content': 0.09516321122646332, 'timestamp': '2025-09-30 22:20:54.982416', 'step': 6189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:55.040183', 'step': 6189, 'epoch': 1} {'type': 'loss', 'content': 0.2015446573495865, 'timestamp': '2025-09-30 22:20:55.043275', 'step': 6190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:55.101016', 'step': 6190, 'epoch': 1} {'type': 'loss', 'content': 0.10514440387487411, 'timestamp': '2025-09-30 22:20:55.103946', 'step': 6191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:55.161542', 'step': 6191, 'epoch': 1} {'type': 'loss', 'content': 0.24500541388988495, 'timestamp': '2025-09-30 22:20:55.170962', 'step': 6192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:55.238497', 'step': 6192, 'epoch': 1} {'type': 'loss', 'content': 0.19947773218154907, 'timestamp': '2025-09-30 22:20:55.241444', 'step': 6193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:55.305191', 'step': 6193, 'epoch': 1} {'type': 'loss', 'content': 0.20479702949523926, 'timestamp': '2025-09-30 22:20:55.308386', 'step': 6194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:55.366347', 'step': 6194, 'epoch': 1} {'type': 'loss', 'content': 0.16107416152954102, 'timestamp': '2025-09-30 22:20:55.368910', 'step': 6195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:55.440086', 'step': 6195, 'epoch': 1} {'type': 'loss', 'content': 0.08551803231239319, 'timestamp': '2025-09-30 22:20:55.447706', 'step': 6196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:55.504836', 'step': 6196, 'epoch': 1} {'type': 'loss', 'content': 0.17634357511997223, 'timestamp': '2025-09-30 22:20:55.508428', 'step': 6197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:55.566668', 'step': 6197, 'epoch': 1} {'type': 'loss', 'content': 0.10374046117067337, 'timestamp': '2025-09-30 22:20:55.575107', 'step': 6198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:55.634285', 'step': 6198, 'epoch': 1} {'type': 'loss', 'content': 0.12637747824192047, 'timestamp': '2025-09-30 22:20:55.643530', 'step': 6199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:55.703752', 'step': 6199, 'epoch': 1} {'type': 'loss', 'content': 0.13017326593399048, 'timestamp': '2025-09-30 22:20:55.711481', 'step': 6200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:55.773585', 'step': 6200, 'epoch': 1} {'type': 'loss', 'content': 0.23664160072803497, 'timestamp': '2025-09-30 22:20:55.777667', 'step': 6201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:55.835110', 'step': 6201, 'epoch': 1} {'type': 'loss', 'content': 0.12989668548107147, 'timestamp': '2025-09-30 22:20:55.838324', 'step': 6202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:55.895716', 'step': 6202, 'epoch': 1} {'type': 'loss', 'content': 0.1576099544763565, 'timestamp': '2025-09-30 22:20:55.898731', 'step': 6203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:55.960600', 'step': 6203, 'epoch': 1} {'type': 'loss', 'content': 0.17170269787311554, 'timestamp': '2025-09-30 22:20:55.967505', 'step': 6204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:56.025258', 'step': 6204, 'epoch': 1} {'type': 'loss', 'content': 0.13823094964027405, 'timestamp': '2025-09-30 22:20:56.036408', 'step': 6205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:56.099150', 'step': 6205, 'epoch': 1} {'type': 'loss', 'content': 0.25169637799263, 'timestamp': '2025-09-30 22:20:56.102531', 'step': 6206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:56.161462', 'step': 6206, 'epoch': 1} {'type': 'loss', 'content': 0.12024229764938354, 'timestamp': '2025-09-30 22:20:56.164157', 'step': 6207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:56.221546', 'step': 6207, 'epoch': 1} {'type': 'loss', 'content': 0.12782564759254456, 'timestamp': '2025-09-30 22:20:56.227613', 'step': 6208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:56.284381', 'step': 6208, 'epoch': 1} {'type': 'loss', 'content': 0.21363143622875214, 'timestamp': '2025-09-30 22:20:56.287065', 'step': 6209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:56.344406', 'step': 6209, 'epoch': 1} {'type': 'loss', 'content': 0.18829694390296936, 'timestamp': '2025-09-30 22:20:56.351313', 'step': 6210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:56.413984', 'step': 6210, 'epoch': 1} {'type': 'loss', 'content': 0.194102481007576, 'timestamp': '2025-09-30 22:20:56.423493', 'step': 6211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:56.481883', 'step': 6211, 'epoch': 1} {'type': 'loss', 'content': 0.1661621630191803, 'timestamp': '2025-09-30 22:20:56.489548', 'step': 6212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:56.554040', 'step': 6212, 'epoch': 1} {'type': 'loss', 'content': 0.17594695091247559, 'timestamp': '2025-09-30 22:20:56.561969', 'step': 6213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:56.625683', 'step': 6213, 'epoch': 1} {'type': 'loss', 'content': 0.12970873713493347, 'timestamp': '2025-09-30 22:20:56.630538', 'step': 6214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:56.694507', 'step': 6214, 'epoch': 1} {'type': 'loss', 'content': 0.0846414789557457, 'timestamp': '2025-09-30 22:20:56.701268', 'step': 6215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:56.761862', 'step': 6215, 'epoch': 1} {'type': 'loss', 'content': 0.14745108783245087, 'timestamp': '2025-09-30 22:20:56.775564', 'step': 6216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:56.838497', 'step': 6216, 'epoch': 1} {'type': 'loss', 'content': 0.17709995806217194, 'timestamp': '2025-09-30 22:20:56.846894', 'step': 6217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:56.911402', 'step': 6217, 'epoch': 1} {'type': 'loss', 'content': 0.2613260746002197, 'timestamp': '2025-09-30 22:20:56.914815', 'step': 6218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:56.972186', 'step': 6218, 'epoch': 1} {'type': 'loss', 'content': 0.3463912904262543, 'timestamp': '2025-09-30 22:20:56.975535', 'step': 6219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:20:57.032415', 'step': 6219, 'epoch': 1} {'type': 'loss', 'content': 0.12138225138187408, 'timestamp': '2025-09-30 22:20:57.038817', 'step': 6220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.095296', 'step': 6220, 'epoch': 1} {'type': 'loss', 'content': 0.12849630415439606, 'timestamp': '2025-09-30 22:20:57.097984', 'step': 6221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:20:57.154357', 'step': 6221, 'epoch': 1} {'type': 'loss', 'content': 0.1523992121219635, 'timestamp': '2025-09-30 22:20:57.157637', 'step': 6222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.215861', 'step': 6222, 'epoch': 1} {'type': 'loss', 'content': 0.09511459618806839, 'timestamp': '2025-09-30 22:20:57.219000', 'step': 6223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.277081', 'step': 6223, 'epoch': 1} {'type': 'loss', 'content': 0.15156713128089905, 'timestamp': '2025-09-30 22:20:57.289706', 'step': 6224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:57.361269', 'step': 6224, 'epoch': 1} {'type': 'loss', 'content': 0.09904385358095169, 'timestamp': '2025-09-30 22:20:57.365566', 'step': 6225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:57.423071', 'step': 6225, 'epoch': 1} {'type': 'loss', 'content': 0.1503850519657135, 'timestamp': '2025-09-30 22:20:57.430357', 'step': 6226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.487918', 'step': 6226, 'epoch': 1} {'type': 'loss', 'content': 0.21907588839530945, 'timestamp': '2025-09-30 22:20:57.497927', 'step': 6227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:57.557406', 'step': 6227, 'epoch': 1} {'type': 'loss', 'content': 0.28768810629844666, 'timestamp': '2025-09-30 22:20:57.564237', 'step': 6228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.621104', 'step': 6228, 'epoch': 1} {'type': 'loss', 'content': 0.11541057378053665, 'timestamp': '2025-09-30 22:20:57.624993', 'step': 6229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.683261', 'step': 6229, 'epoch': 1} {'type': 'loss', 'content': 0.1371142715215683, 'timestamp': '2025-09-30 22:20:57.687431', 'step': 6230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:57.747296', 'step': 6230, 'epoch': 1} {'type': 'loss', 'content': 0.12449418753385544, 'timestamp': '2025-09-30 22:20:57.757040', 'step': 6231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:57.814233', 'step': 6231, 'epoch': 1} {'type': 'loss', 'content': 0.254550039768219, 'timestamp': '2025-09-30 22:20:57.821598', 'step': 6232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.878016', 'step': 6232, 'epoch': 1} {'type': 'loss', 'content': 0.11702282726764679, 'timestamp': '2025-09-30 22:20:57.881091', 'step': 6233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:57.953031', 'step': 6233, 'epoch': 1} {'type': 'loss', 'content': 0.15250006318092346, 'timestamp': '2025-09-30 22:20:57.955594', 'step': 6234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:58.017198', 'step': 6234, 'epoch': 1} {'type': 'loss', 'content': 0.1835671067237854, 'timestamp': '2025-09-30 22:20:58.021155', 'step': 6235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:58.084106', 'step': 6235, 'epoch': 1} {'type': 'loss', 'content': 0.15327881276607513, 'timestamp': '2025-09-30 22:20:58.091217', 'step': 6236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:58.147912', 'step': 6236, 'epoch': 1} {'type': 'loss', 'content': 0.1382031887769699, 'timestamp': '2025-09-30 22:20:58.150808', 'step': 6237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:58.207686', 'step': 6237, 'epoch': 1} {'type': 'loss', 'content': 0.08638589829206467, 'timestamp': '2025-09-30 22:20:58.210262', 'step': 6238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:58.267263', 'step': 6238, 'epoch': 1} {'type': 'loss', 'content': 0.2636690139770508, 'timestamp': '2025-09-30 22:20:58.273532', 'step': 6239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:58.330454', 'step': 6239, 'epoch': 1} {'type': 'loss', 'content': 0.1817309707403183, 'timestamp': '2025-09-30 22:20:58.339446', 'step': 6240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:58.398584', 'step': 6240, 'epoch': 1} {'type': 'loss', 'content': 0.16435641050338745, 'timestamp': '2025-09-30 22:20:58.404600', 'step': 6241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:58.466099', 'step': 6241, 'epoch': 1} {'type': 'loss', 'content': 0.23438991606235504, 'timestamp': '2025-09-30 22:20:58.474901', 'step': 6242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:58.532845', 'step': 6242, 'epoch': 1} {'type': 'loss', 'content': 0.15380676090717316, 'timestamp': '2025-09-30 22:20:58.543181', 'step': 6243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:58.605182', 'step': 6243, 'epoch': 1} {'type': 'loss', 'content': 0.17492863535881042, 'timestamp': '2025-09-30 22:20:58.611996', 'step': 6244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:58.669232', 'step': 6244, 'epoch': 1} {'type': 'loss', 'content': 0.07849802076816559, 'timestamp': '2025-09-30 22:20:58.673045', 'step': 6245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:58.735505', 'step': 6245, 'epoch': 1} {'type': 'loss', 'content': 0.13343873620033264, 'timestamp': '2025-09-30 22:20:58.741693', 'step': 6246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:20:58.798958', 'step': 6246, 'epoch': 1} {'type': 'loss', 'content': 0.23270221054553986, 'timestamp': '2025-09-30 22:20:58.803130', 'step': 6247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:58.859721', 'step': 6247, 'epoch': 1} {'type': 'loss', 'content': 0.14341461658477783, 'timestamp': '2025-09-30 22:20:58.870628', 'step': 6248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:58.929315', 'step': 6248, 'epoch': 1} {'type': 'loss', 'content': 0.15762639045715332, 'timestamp': '2025-09-30 22:20:58.933749', 'step': 6249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:58.999403', 'step': 6249, 'epoch': 1} {'type': 'loss', 'content': 0.18653973937034607, 'timestamp': '2025-09-30 22:20:59.007041', 'step': 6250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:59.070814', 'step': 6250, 'epoch': 1} {'type': 'loss', 'content': 0.12759225070476532, 'timestamp': '2025-09-30 22:20:59.078176', 'step': 6251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:59.134681', 'step': 6251, 'epoch': 1} {'type': 'loss', 'content': 0.11862468719482422, 'timestamp': '2025-09-30 22:20:59.140596', 'step': 6252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:59.200591', 'step': 6252, 'epoch': 1} {'type': 'loss', 'content': 0.09043938666582108, 'timestamp': '2025-09-30 22:20:59.204071', 'step': 6253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:59.266824', 'step': 6253, 'epoch': 1} {'type': 'loss', 'content': 0.23038943111896515, 'timestamp': '2025-09-30 22:20:59.272723', 'step': 6254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:59.337726', 'step': 6254, 'epoch': 1} {'type': 'loss', 'content': 0.28851285576820374, 'timestamp': '2025-09-30 22:20:59.341617', 'step': 6255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:20:59.406871', 'step': 6255, 'epoch': 1} {'type': 'loss', 'content': 0.12966053187847137, 'timestamp': '2025-09-30 22:20:59.422744', 'step': 6256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:59.498874', 'step': 6256, 'epoch': 1} {'type': 'loss', 'content': 0.12952998280525208, 'timestamp': '2025-09-30 22:20:59.509881', 'step': 6257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:59.575484', 'step': 6257, 'epoch': 1} {'type': 'loss', 'content': 0.14328783750534058, 'timestamp': '2025-09-30 22:20:59.579064', 'step': 6258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:20:59.639832', 'step': 6258, 'epoch': 1} {'type': 'loss', 'content': 0.19802778959274292, 'timestamp': '2025-09-30 22:20:59.648547', 'step': 6259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:59.706705', 'step': 6259, 'epoch': 1} {'type': 'loss', 'content': 0.18619868159294128, 'timestamp': '2025-09-30 22:20:59.715863', 'step': 6260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:59.779458', 'step': 6260, 'epoch': 1} {'type': 'loss', 'content': 0.08514329046010971, 'timestamp': '2025-09-30 22:20:59.787762', 'step': 6261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:20:59.851590', 'step': 6261, 'epoch': 1} {'type': 'loss', 'content': 0.08537854999303818, 'timestamp': '2025-09-30 22:20:59.854591', 'step': 6262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:20:59.914846', 'step': 6262, 'epoch': 1} {'type': 'loss', 'content': 0.19495417177677155, 'timestamp': '2025-09-30 22:20:59.920190', 'step': 6263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:20:59.977131', 'step': 6263, 'epoch': 1} {'type': 'loss', 'content': 0.1464190036058426, 'timestamp': '2025-09-30 22:20:59.983233', 'step': 6264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:00.039798', 'step': 6264, 'epoch': 1} {'type': 'loss', 'content': 0.13354970514774323, 'timestamp': '2025-09-30 22:21:00.043647', 'step': 6265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:00.100892', 'step': 6265, 'epoch': 1} {'type': 'loss', 'content': 0.20337358117103577, 'timestamp': '2025-09-30 22:21:00.103418', 'step': 6266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:00.166915', 'step': 6266, 'epoch': 1} {'type': 'loss', 'content': 0.12982076406478882, 'timestamp': '2025-09-30 22:21:00.169288', 'step': 6267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:00.229764', 'step': 6267, 'epoch': 1} {'type': 'loss', 'content': 0.16647383570671082, 'timestamp': '2025-09-30 22:21:00.235703', 'step': 6268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:00.291693', 'step': 6268, 'epoch': 1} {'type': 'loss', 'content': 0.16912591457366943, 'timestamp': '2025-09-30 22:21:00.298664', 'step': 6269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:00.356446', 'step': 6269, 'epoch': 1} {'type': 'loss', 'content': 0.19583533704280853, 'timestamp': '2025-09-30 22:21:00.360282', 'step': 6270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:00.418664', 'step': 6270, 'epoch': 1} {'type': 'loss', 'content': 0.20960816740989685, 'timestamp': '2025-09-30 22:21:00.423129', 'step': 6271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:00.480208', 'step': 6271, 'epoch': 1} {'type': 'loss', 'content': 0.15074646472930908, 'timestamp': '2025-09-30 22:21:00.488606', 'step': 6272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:00.546697', 'step': 6272, 'epoch': 1} {'type': 'loss', 'content': 0.1476564258337021, 'timestamp': '2025-09-30 22:21:00.553738', 'step': 6273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:00.611798', 'step': 6273, 'epoch': 1} {'type': 'loss', 'content': 0.17482705414295197, 'timestamp': '2025-09-30 22:21:00.615205', 'step': 6274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:00.690321', 'step': 6274, 'epoch': 1} {'type': 'loss', 'content': 0.12696902453899384, 'timestamp': '2025-09-30 22:21:00.693412', 'step': 6275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:00.750379', 'step': 6275, 'epoch': 1} {'type': 'loss', 'content': 0.25416189432144165, 'timestamp': '2025-09-30 22:21:00.756890', 'step': 6276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:00.814024', 'step': 6276, 'epoch': 1} {'type': 'loss', 'content': 0.12418568879365921, 'timestamp': '2025-09-30 22:21:00.819636', 'step': 6277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:00.881146', 'step': 6277, 'epoch': 1} {'type': 'loss', 'content': 0.20562289655208588, 'timestamp': '2025-09-30 22:21:00.884645', 'step': 6278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:00.947134', 'step': 6278, 'epoch': 1} {'type': 'loss', 'content': 0.17548903822898865, 'timestamp': '2025-09-30 22:21:00.955974', 'step': 6279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:01.021067', 'step': 6279, 'epoch': 1} {'type': 'loss', 'content': 0.1334313154220581, 'timestamp': '2025-09-30 22:21:01.027161', 'step': 6280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:01.091270', 'step': 6280, 'epoch': 1} {'type': 'loss', 'content': 0.1889769285917282, 'timestamp': '2025-09-30 22:21:01.094120', 'step': 6281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:01.151371', 'step': 6281, 'epoch': 1} {'type': 'loss', 'content': 0.18575483560562134, 'timestamp': '2025-09-30 22:21:01.154415', 'step': 6282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:01.213838', 'step': 6282, 'epoch': 1} {'type': 'loss', 'content': 0.2676064372062683, 'timestamp': '2025-09-30 22:21:01.219426', 'step': 6283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:01.278144', 'step': 6283, 'epoch': 1} {'type': 'loss', 'content': 0.1413811892271042, 'timestamp': '2025-09-30 22:21:01.289402', 'step': 6284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:01.348210', 'step': 6284, 'epoch': 1} {'type': 'loss', 'content': 0.17433658242225647, 'timestamp': '2025-09-30 22:21:01.351767', 'step': 6285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:01.414300', 'step': 6285, 'epoch': 1} {'type': 'loss', 'content': 0.1710537225008011, 'timestamp': '2025-09-30 22:21:01.419169', 'step': 6286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:01.480070', 'step': 6286, 'epoch': 1} {'type': 'loss', 'content': 0.16029013693332672, 'timestamp': '2025-09-30 22:21:01.482735', 'step': 6287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:01.539983', 'step': 6287, 'epoch': 1} {'type': 'loss', 'content': 0.2540409564971924, 'timestamp': '2025-09-30 22:21:01.546697', 'step': 6288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:01.609693', 'step': 6288, 'epoch': 1} {'type': 'loss', 'content': 0.1943308413028717, 'timestamp': '2025-09-30 22:21:01.612648', 'step': 6289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:01.671550', 'step': 6289, 'epoch': 1} {'type': 'loss', 'content': 0.13729576766490936, 'timestamp': '2025-09-30 22:21:01.674922', 'step': 6290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:01.731768', 'step': 6290, 'epoch': 1} {'type': 'loss', 'content': 0.23305220901966095, 'timestamp': '2025-09-30 22:21:01.736254', 'step': 6291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:01.798883', 'step': 6291, 'epoch': 1} {'type': 'loss', 'content': 0.23685328662395477, 'timestamp': '2025-09-30 22:21:01.805139', 'step': 6292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:01.870751', 'step': 6292, 'epoch': 1} {'type': 'loss', 'content': 0.11654961854219437, 'timestamp': '2025-09-30 22:21:01.874075', 'step': 6293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:01.931977', 'step': 6293, 'epoch': 1} {'type': 'loss', 'content': 0.3410208225250244, 'timestamp': '2025-09-30 22:21:01.935632', 'step': 6294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:01.992266', 'step': 6294, 'epoch': 1} {'type': 'loss', 'content': 0.1878976672887802, 'timestamp': '2025-09-30 22:21:01.995068', 'step': 6295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:02.053868', 'step': 6295, 'epoch': 1} {'type': 'loss', 'content': 0.118501216173172, 'timestamp': '2025-09-30 22:21:02.062445', 'step': 6296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:02.120605', 'step': 6296, 'epoch': 1} {'type': 'loss', 'content': 0.160598024725914, 'timestamp': '2025-09-30 22:21:02.123285', 'step': 6297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:02.180121', 'step': 6297, 'epoch': 1} {'type': 'loss', 'content': 0.17373202741146088, 'timestamp': '2025-09-30 22:21:02.190209', 'step': 6298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:02.247900', 'step': 6298, 'epoch': 1} {'type': 'loss', 'content': 0.16289271414279938, 'timestamp': '2025-09-30 22:21:02.251458', 'step': 6299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:02.309128', 'step': 6299, 'epoch': 1} {'type': 'loss', 'content': 0.22503814101219177, 'timestamp': '2025-09-30 22:21:02.316261', 'step': 6300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:02.382876', 'step': 6300, 'epoch': 1} {'type': 'loss', 'content': 0.20745056867599487, 'timestamp': '2025-09-30 22:21:02.389235', 'step': 6301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:02.455856', 'step': 6301, 'epoch': 1} {'type': 'loss', 'content': 0.1549425572156906, 'timestamp': '2025-09-30 22:21:02.458260', 'step': 6302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:02.526304', 'step': 6302, 'epoch': 1} {'type': 'loss', 'content': 0.12433423846960068, 'timestamp': '2025-09-30 22:21:02.532046', 'step': 6303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:02.593995', 'step': 6303, 'epoch': 1} {'type': 'loss', 'content': 0.15053747594356537, 'timestamp': '2025-09-30 22:21:02.600834', 'step': 6304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:02.673193', 'step': 6304, 'epoch': 1} {'type': 'loss', 'content': 0.12427789717912674, 'timestamp': '2025-09-30 22:21:02.678536', 'step': 6305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:02.742199', 'step': 6305, 'epoch': 1} {'type': 'loss', 'content': 0.15167061984539032, 'timestamp': '2025-09-30 22:21:02.745167', 'step': 6306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:02.818079', 'step': 6306, 'epoch': 1} {'type': 'loss', 'content': 0.19208137691020966, 'timestamp': '2025-09-30 22:21:02.823980', 'step': 6307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:21:02.885337', 'step': 6307, 'epoch': 1} {'type': 'loss', 'content': 0.09363953769207001, 'timestamp': '2025-09-30 22:21:02.894560', 'step': 6308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:02.956056', 'step': 6308, 'epoch': 1} {'type': 'loss', 'content': 0.1165311262011528, 'timestamp': '2025-09-30 22:21:02.958573', 'step': 6309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:03.031146', 'step': 6309, 'epoch': 1} {'type': 'loss', 'content': 0.1468370109796524, 'timestamp': '2025-09-30 22:21:03.034310', 'step': 6310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:03.112976', 'step': 6310, 'epoch': 1} {'type': 'loss', 'content': 0.15404599905014038, 'timestamp': '2025-09-30 22:21:03.115650', 'step': 6311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:03.191432', 'step': 6311, 'epoch': 1} {'type': 'loss', 'content': 0.08184672892093658, 'timestamp': '2025-09-30 22:21:03.197521', 'step': 6312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:03.269751', 'step': 6312, 'epoch': 1} {'type': 'loss', 'content': 0.13306567072868347, 'timestamp': '2025-09-30 22:21:03.275238', 'step': 6313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:03.341607', 'step': 6313, 'epoch': 1} {'type': 'loss', 'content': 0.1382109671831131, 'timestamp': '2025-09-30 22:21:03.350622', 'step': 6314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:03.419209', 'step': 6314, 'epoch': 1} {'type': 'loss', 'content': 0.25679758191108704, 'timestamp': '2025-09-30 22:21:03.426533', 'step': 6315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:03.503580', 'step': 6315, 'epoch': 1} {'type': 'loss', 'content': 0.18651758134365082, 'timestamp': '2025-09-30 22:21:03.509960', 'step': 6316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:03.571728', 'step': 6316, 'epoch': 1} {'type': 'loss', 'content': 0.1978686898946762, 'timestamp': '2025-09-30 22:21:03.577407', 'step': 6317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:03.647249', 'step': 6317, 'epoch': 1} {'type': 'loss', 'content': 0.30690810084342957, 'timestamp': '2025-09-30 22:21:03.650379', 'step': 6318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:03.735798', 'step': 6318, 'epoch': 1} {'type': 'loss', 'content': 0.3060799837112427, 'timestamp': '2025-09-30 22:21:03.738281', 'step': 6319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:03.795718', 'step': 6319, 'epoch': 1} {'type': 'loss', 'content': 0.14146706461906433, 'timestamp': '2025-09-30 22:21:03.810442', 'step': 6320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:03.867223', 'step': 6320, 'epoch': 1} {'type': 'loss', 'content': 0.17076261341571808, 'timestamp': '2025-09-30 22:21:03.869641', 'step': 6321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:03.930234', 'step': 6321, 'epoch': 1} {'type': 'loss', 'content': 0.2299429029226303, 'timestamp': '2025-09-30 22:21:03.932946', 'step': 6322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:03.991403', 'step': 6322, 'epoch': 1} {'type': 'loss', 'content': 0.12949691712856293, 'timestamp': '2025-09-30 22:21:04.010434', 'step': 6323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:04.085824', 'step': 6323, 'epoch': 1} {'type': 'loss', 'content': 0.12701216340065002, 'timestamp': '2025-09-30 22:21:04.101083', 'step': 6324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:04.164070', 'step': 6324, 'epoch': 1} {'type': 'loss', 'content': 0.1672930270433426, 'timestamp': '2025-09-30 22:21:04.171469', 'step': 6325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:04.229565', 'step': 6325, 'epoch': 1} {'type': 'loss', 'content': 0.1269827038049698, 'timestamp': '2025-09-30 22:21:04.241377', 'step': 6326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:04.306129', 'step': 6326, 'epoch': 1} {'type': 'loss', 'content': 0.09875903278589249, 'timestamp': '2025-09-30 22:21:04.309018', 'step': 6327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:04.380107', 'step': 6327, 'epoch': 1} {'type': 'loss', 'content': 0.15577904880046844, 'timestamp': '2025-09-30 22:21:04.387701', 'step': 6328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:04.456703', 'step': 6328, 'epoch': 1} {'type': 'loss', 'content': 0.13525573909282684, 'timestamp': '2025-09-30 22:21:04.467580', 'step': 6329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:04.526728', 'step': 6329, 'epoch': 1} {'type': 'loss', 'content': 0.13339680433273315, 'timestamp': '2025-09-30 22:21:04.531244', 'step': 6330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:04.590727', 'step': 6330, 'epoch': 1} {'type': 'loss', 'content': 0.15395194292068481, 'timestamp': '2025-09-30 22:21:04.594009', 'step': 6331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:04.653776', 'step': 6331, 'epoch': 1} {'type': 'loss', 'content': 0.12798354029655457, 'timestamp': '2025-09-30 22:21:04.660856', 'step': 6332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:04.725162', 'step': 6332, 'epoch': 1} {'type': 'loss', 'content': 0.11650082468986511, 'timestamp': '2025-09-30 22:21:04.729032', 'step': 6333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:04.788758', 'step': 6333, 'epoch': 1} {'type': 'loss', 'content': 0.14184854924678802, 'timestamp': '2025-09-30 22:21:04.798004', 'step': 6334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:04.861947', 'step': 6334, 'epoch': 1} {'type': 'loss', 'content': 0.12188369035720825, 'timestamp': '2025-09-30 22:21:04.864598', 'step': 6335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:04.927808', 'step': 6335, 'epoch': 1} {'type': 'loss', 'content': 0.1949416697025299, 'timestamp': '2025-09-30 22:21:04.938936', 'step': 6336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:04.996977', 'step': 6336, 'epoch': 1} {'type': 'loss', 'content': 0.13445791602134705, 'timestamp': '2025-09-30 22:21:05.003498', 'step': 6337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:05.062865', 'step': 6337, 'epoch': 1} {'type': 'loss', 'content': 0.1652081459760666, 'timestamp': '2025-09-30 22:21:05.068030', 'step': 6338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:05.130741', 'step': 6338, 'epoch': 1} {'type': 'loss', 'content': 0.2097751945257187, 'timestamp': '2025-09-30 22:21:05.138227', 'step': 6339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:05.201191', 'step': 6339, 'epoch': 1} {'type': 'loss', 'content': 0.14810810983181, 'timestamp': '2025-09-30 22:21:05.207756', 'step': 6340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:05.268104', 'step': 6340, 'epoch': 1} {'type': 'loss', 'content': 0.13362933695316315, 'timestamp': '2025-09-30 22:21:05.270878', 'step': 6341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:05.332619', 'step': 6341, 'epoch': 1} {'type': 'loss', 'content': 0.225768581032753, 'timestamp': '2025-09-30 22:21:05.335303', 'step': 6342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:05.393069', 'step': 6342, 'epoch': 1} {'type': 'loss', 'content': 0.23021522164344788, 'timestamp': '2025-09-30 22:21:05.396016', 'step': 6343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:05.463624', 'step': 6343, 'epoch': 1} {'type': 'loss', 'content': 0.1472669392824173, 'timestamp': '2025-09-30 22:21:05.476302', 'step': 6344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:05.533151', 'step': 6344, 'epoch': 1} {'type': 'loss', 'content': 0.23096607625484467, 'timestamp': '2025-09-30 22:21:05.540853', 'step': 6345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:05.598706', 'step': 6345, 'epoch': 1} {'type': 'loss', 'content': 0.10781431943178177, 'timestamp': '2025-09-30 22:21:05.606169', 'step': 6346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:05.671322', 'step': 6346, 'epoch': 1} {'type': 'loss', 'content': 0.12913399934768677, 'timestamp': '2025-09-30 22:21:05.677996', 'step': 6347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:05.734803', 'step': 6347, 'epoch': 1} {'type': 'loss', 'content': 0.12958446145057678, 'timestamp': '2025-09-30 22:21:05.742063', 'step': 6348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:05.801511', 'step': 6348, 'epoch': 1} {'type': 'loss', 'content': 0.1656564176082611, 'timestamp': '2025-09-30 22:21:05.805814', 'step': 6349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:05.868283', 'step': 6349, 'epoch': 1} {'type': 'loss', 'content': 0.17886582016944885, 'timestamp': '2025-09-30 22:21:05.871328', 'step': 6350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:05.929520', 'step': 6350, 'epoch': 1} {'type': 'loss', 'content': 0.17536455392837524, 'timestamp': '2025-09-30 22:21:05.932746', 'step': 6351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:05.991469', 'step': 6351, 'epoch': 1} {'type': 'loss', 'content': 0.15182159841060638, 'timestamp': '2025-09-30 22:21:05.999370', 'step': 6352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:06.060559', 'step': 6352, 'epoch': 1} {'type': 'loss', 'content': 0.14472071826457977, 'timestamp': '2025-09-30 22:21:06.064676', 'step': 6353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:06.121712', 'step': 6353, 'epoch': 1} {'type': 'loss', 'content': 0.1352689117193222, 'timestamp': '2025-09-30 22:21:06.125402', 'step': 6354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:06.182593', 'step': 6354, 'epoch': 1} {'type': 'loss', 'content': 0.20870813727378845, 'timestamp': '2025-09-30 22:21:06.186058', 'step': 6355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:06.249511', 'step': 6355, 'epoch': 1} {'type': 'loss', 'content': 0.15420664846897125, 'timestamp': '2025-09-30 22:21:06.263013', 'step': 6356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:06.320873', 'step': 6356, 'epoch': 1} {'type': 'loss', 'content': 0.1479908674955368, 'timestamp': '2025-09-30 22:21:06.339536', 'step': 6357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:06.396622', 'step': 6357, 'epoch': 1} {'type': 'loss', 'content': 0.1418473869562149, 'timestamp': '2025-09-30 22:21:06.399378', 'step': 6358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:06.462196', 'step': 6358, 'epoch': 1} {'type': 'loss', 'content': 0.13584360480308533, 'timestamp': '2025-09-30 22:21:06.465382', 'step': 6359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:06.522363', 'step': 6359, 'epoch': 1} {'type': 'loss', 'content': 0.2621622383594513, 'timestamp': '2025-09-30 22:21:06.528746', 'step': 6360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:06.585038', 'step': 6360, 'epoch': 1} {'type': 'loss', 'content': 0.11149738729000092, 'timestamp': '2025-09-30 22:21:06.587734', 'step': 6361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:06.645870', 'step': 6361, 'epoch': 1} {'type': 'loss', 'content': 0.12371456623077393, 'timestamp': '2025-09-30 22:21:06.648436', 'step': 6362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:06.706343', 'step': 6362, 'epoch': 1} {'type': 'loss', 'content': 0.11088209599256516, 'timestamp': '2025-09-30 22:21:06.709133', 'step': 6363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:06.767117', 'step': 6363, 'epoch': 1} {'type': 'loss', 'content': 0.24093715846538544, 'timestamp': '2025-09-30 22:21:06.774296', 'step': 6364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:06.830592', 'step': 6364, 'epoch': 1} {'type': 'loss', 'content': 0.19206522405147552, 'timestamp': '2025-09-30 22:21:06.833494', 'step': 6365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:06.895817', 'step': 6365, 'epoch': 1} {'type': 'loss', 'content': 0.13319243490695953, 'timestamp': '2025-09-30 22:21:06.899971', 'step': 6366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:06.970374', 'step': 6366, 'epoch': 1} {'type': 'loss', 'content': 0.10649430006742477, 'timestamp': '2025-09-30 22:21:06.974411', 'step': 6367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:07.031690', 'step': 6367, 'epoch': 1} {'type': 'loss', 'content': 0.13681983947753906, 'timestamp': '2025-09-30 22:21:07.045037', 'step': 6368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:07.100823', 'step': 6368, 'epoch': 1} {'type': 'loss', 'content': 0.1580129861831665, 'timestamp': '2025-09-30 22:21:07.103442', 'step': 6369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:07.168389', 'step': 6369, 'epoch': 1} {'type': 'loss', 'content': 0.1724255532026291, 'timestamp': '2025-09-30 22:21:07.171073', 'step': 6370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:07.230065', 'step': 6370, 'epoch': 1} {'type': 'loss', 'content': 0.15144021809101105, 'timestamp': '2025-09-30 22:21:07.233104', 'step': 6371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:07.291328', 'step': 6371, 'epoch': 1} {'type': 'loss', 'content': 0.0924973413348198, 'timestamp': '2025-09-30 22:21:07.300924', 'step': 6372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:07.365236', 'step': 6372, 'epoch': 1} {'type': 'loss', 'content': 0.15578919649124146, 'timestamp': '2025-09-30 22:21:07.376571', 'step': 6373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:07.436207', 'step': 6373, 'epoch': 1} {'type': 'loss', 'content': 0.10698920488357544, 'timestamp': '2025-09-30 22:21:07.439912', 'step': 6374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:07.510584', 'step': 6374, 'epoch': 1} {'type': 'loss', 'content': 0.1706046611070633, 'timestamp': '2025-09-30 22:21:07.515066', 'step': 6375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:07.572868', 'step': 6375, 'epoch': 1} {'type': 'loss', 'content': 0.22029046714305878, 'timestamp': '2025-09-30 22:21:07.579408', 'step': 6376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:07.635775', 'step': 6376, 'epoch': 1} {'type': 'loss', 'content': 0.20263533294200897, 'timestamp': '2025-09-30 22:21:07.639050', 'step': 6377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:07.696453', 'step': 6377, 'epoch': 1} {'type': 'loss', 'content': 0.12777896225452423, 'timestamp': '2025-09-30 22:21:07.700732', 'step': 6378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:07.758502', 'step': 6378, 'epoch': 1} {'type': 'loss', 'content': 0.16604776680469513, 'timestamp': '2025-09-30 22:21:07.768523', 'step': 6379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:07.827378', 'step': 6379, 'epoch': 1} {'type': 'loss', 'content': 0.20338934659957886, 'timestamp': '2025-09-30 22:21:07.833932', 'step': 6380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:07.924618', 'step': 6380, 'epoch': 1} {'type': 'loss', 'content': 0.09561073780059814, 'timestamp': '2025-09-30 22:21:07.935757', 'step': 6381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:08.017842', 'step': 6381, 'epoch': 1} {'type': 'loss', 'content': 0.1560492217540741, 'timestamp': '2025-09-30 22:21:08.024501', 'step': 6382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:08.101819', 'step': 6382, 'epoch': 1} {'type': 'loss', 'content': 0.15041397511959076, 'timestamp': '2025-09-30 22:21:08.104655', 'step': 6383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:08.196402', 'step': 6383, 'epoch': 1} {'type': 'loss', 'content': 0.10958775877952576, 'timestamp': '2025-09-30 22:21:08.203508', 'step': 6384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:08.279524', 'step': 6384, 'epoch': 1} {'type': 'loss', 'content': 0.18012745678424835, 'timestamp': '2025-09-30 22:21:08.288142', 'step': 6385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:08.388748', 'step': 6385, 'epoch': 1} {'type': 'loss', 'content': 0.229732483625412, 'timestamp': '2025-09-30 22:21:08.395143', 'step': 6386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:08.477607', 'step': 6386, 'epoch': 1} {'type': 'loss', 'content': 0.22584502398967743, 'timestamp': '2025-09-30 22:21:08.481798', 'step': 6387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:08.562520', 'step': 6387, 'epoch': 1} {'type': 'loss', 'content': 0.20709365606307983, 'timestamp': '2025-09-30 22:21:08.569955', 'step': 6388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:08.651843', 'step': 6388, 'epoch': 1} {'type': 'loss', 'content': 0.16647781431674957, 'timestamp': '2025-09-30 22:21:08.659235', 'step': 6389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:08.741259', 'step': 6389, 'epoch': 1} {'type': 'loss', 'content': 0.16154688596725464, 'timestamp': '2025-09-30 22:21:08.748865', 'step': 6390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:08.840805', 'step': 6390, 'epoch': 1} {'type': 'loss', 'content': 0.11202400177717209, 'timestamp': '2025-09-30 22:21:08.851093', 'step': 6391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:08.931645', 'step': 6391, 'epoch': 1} {'type': 'loss', 'content': 0.23267020285129547, 'timestamp': '2025-09-30 22:21:08.938148', 'step': 6392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:09.035305', 'step': 6392, 'epoch': 1} {'type': 'loss', 'content': 0.14491641521453857, 'timestamp': '2025-09-30 22:21:09.039041', 'step': 6393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:09.156002', 'step': 6393, 'epoch': 1} {'type': 'loss', 'content': 0.17153599858283997, 'timestamp': '2025-09-30 22:21:09.158752', 'step': 6394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:09.224832', 'step': 6394, 'epoch': 1} {'type': 'loss', 'content': 0.18314875662326813, 'timestamp': '2025-09-30 22:21:09.229558', 'step': 6395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:09.288337', 'step': 6395, 'epoch': 1} {'type': 'loss', 'content': 0.23114070296287537, 'timestamp': '2025-09-30 22:21:09.296139', 'step': 6396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:09.359259', 'step': 6396, 'epoch': 1} {'type': 'loss', 'content': 0.1995689570903778, 'timestamp': '2025-09-30 22:21:09.361997', 'step': 6397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:09.419966', 'step': 6397, 'epoch': 1} {'type': 'loss', 'content': 0.1836661994457245, 'timestamp': '2025-09-30 22:21:09.423056', 'step': 6398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:09.481284', 'step': 6398, 'epoch': 1} {'type': 'loss', 'content': 0.1747565120458603, 'timestamp': '2025-09-30 22:21:09.484027', 'step': 6399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:09.542569', 'step': 6399, 'epoch': 1} {'type': 'loss', 'content': 0.11444906890392303, 'timestamp': '2025-09-30 22:21:09.552009', 'step': 6400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:09.624198', 'step': 6400, 'epoch': 1} {'type': 'loss', 'content': 0.20757214725017548, 'timestamp': '2025-09-30 22:21:09.629589', 'step': 6401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:09.688502', 'step': 6401, 'epoch': 1} {'type': 'loss', 'content': 0.18250630795955658, 'timestamp': '2025-09-30 22:21:09.691371', 'step': 6402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:09.757435', 'step': 6402, 'epoch': 1} {'type': 'loss', 'content': 0.20350104570388794, 'timestamp': '2025-09-30 22:21:09.761772', 'step': 6403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:09.826683', 'step': 6403, 'epoch': 1} {'type': 'loss', 'content': 0.14190290868282318, 'timestamp': '2025-09-30 22:21:09.833264', 'step': 6404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:09.891835', 'step': 6404, 'epoch': 1} {'type': 'loss', 'content': 0.21280084550380707, 'timestamp': '2025-09-30 22:21:09.894431', 'step': 6405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:09.953230', 'step': 6405, 'epoch': 1} {'type': 'loss', 'content': 0.07919421046972275, 'timestamp': '2025-09-30 22:21:09.955632', 'step': 6406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:10.012021', 'step': 6406, 'epoch': 1} {'type': 'loss', 'content': 0.20820893347263336, 'timestamp': '2025-09-30 22:21:10.014589', 'step': 6407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:10.074247', 'step': 6407, 'epoch': 1} {'type': 'loss', 'content': 0.1241689994931221, 'timestamp': '2025-09-30 22:21:10.080643', 'step': 6408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.148093', 'step': 6408, 'epoch': 1} {'type': 'loss', 'content': 0.11034910380840302, 'timestamp': '2025-09-30 22:21:10.151025', 'step': 6409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.207832', 'step': 6409, 'epoch': 1} {'type': 'loss', 'content': 0.1347830444574356, 'timestamp': '2025-09-30 22:21:10.213328', 'step': 6410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.270652', 'step': 6410, 'epoch': 1} {'type': 'loss', 'content': 0.08399399369955063, 'timestamp': '2025-09-30 22:21:10.273251', 'step': 6411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:10.330371', 'step': 6411, 'epoch': 1} {'type': 'loss', 'content': 0.11950813233852386, 'timestamp': '2025-09-30 22:21:10.336873', 'step': 6412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:21:10.394610', 'step': 6412, 'epoch': 1} {'type': 'loss', 'content': 0.14150886237621307, 'timestamp': '2025-09-30 22:21:10.397348', 'step': 6413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:10.458165', 'step': 6413, 'epoch': 1} {'type': 'loss', 'content': 0.20970198512077332, 'timestamp': '2025-09-30 22:21:10.462665', 'step': 6414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:10.520628', 'step': 6414, 'epoch': 1} {'type': 'loss', 'content': 0.14727245271205902, 'timestamp': '2025-09-30 22:21:10.523843', 'step': 6415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.581108', 'step': 6415, 'epoch': 1} {'type': 'loss', 'content': 0.14095616340637207, 'timestamp': '2025-09-30 22:21:10.587320', 'step': 6416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:10.643683', 'step': 6416, 'epoch': 1} {'type': 'loss', 'content': 0.10877332836389542, 'timestamp': '2025-09-30 22:21:10.646752', 'step': 6417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.703620', 'step': 6417, 'epoch': 1} {'type': 'loss', 'content': 0.08908778429031372, 'timestamp': '2025-09-30 22:21:10.706541', 'step': 6418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.763922', 'step': 6418, 'epoch': 1} {'type': 'loss', 'content': 0.17734143137931824, 'timestamp': '2025-09-30 22:21:10.766685', 'step': 6419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.824059', 'step': 6419, 'epoch': 1} {'type': 'loss', 'content': 0.15769487619400024, 'timestamp': '2025-09-30 22:21:10.833672', 'step': 6420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.895448', 'step': 6420, 'epoch': 1} {'type': 'loss', 'content': 0.1614971160888672, 'timestamp': '2025-09-30 22:21:10.898265', 'step': 6421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:10.954783', 'step': 6421, 'epoch': 1} {'type': 'loss', 'content': 0.2714298367500305, 'timestamp': '2025-09-30 22:21:10.962734', 'step': 6422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:11.043412', 'step': 6422, 'epoch': 1} {'type': 'loss', 'content': 0.13646171987056732, 'timestamp': '2025-09-30 22:21:11.046643', 'step': 6423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:11.103056', 'step': 6423, 'epoch': 1} {'type': 'loss', 'content': 0.10647150874137878, 'timestamp': '2025-09-30 22:21:11.108881', 'step': 6424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:11.164346', 'step': 6424, 'epoch': 1} {'type': 'loss', 'content': 0.07039932161569595, 'timestamp': '2025-09-30 22:21:11.167262', 'step': 6425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:11.223417', 'step': 6425, 'epoch': 1} {'type': 'loss', 'content': 0.09470626711845398, 'timestamp': '2025-09-30 22:21:11.226265', 'step': 6426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:11.283160', 'step': 6426, 'epoch': 1} {'type': 'loss', 'content': 0.15196748077869415, 'timestamp': '2025-09-30 22:21:11.287113', 'step': 6427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:11.352395', 'step': 6427, 'epoch': 1} {'type': 'loss', 'content': 0.23096251487731934, 'timestamp': '2025-09-30 22:21:11.360651', 'step': 6428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:11.426042', 'step': 6428, 'epoch': 1} {'type': 'loss', 'content': 0.11028705537319183, 'timestamp': '2025-09-30 22:21:11.429531', 'step': 6429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:11.488092', 'step': 6429, 'epoch': 1} {'type': 'loss', 'content': 0.1727752536535263, 'timestamp': '2025-09-30 22:21:11.493706', 'step': 6430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:11.553614', 'step': 6430, 'epoch': 1} {'type': 'loss', 'content': 0.14939066767692566, 'timestamp': '2025-09-30 22:21:11.558905', 'step': 6431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:11.626150', 'step': 6431, 'epoch': 1} {'type': 'loss', 'content': 0.19118866324424744, 'timestamp': '2025-09-30 22:21:11.633214', 'step': 6432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:11.689721', 'step': 6432, 'epoch': 1} {'type': 'loss', 'content': 0.15146198868751526, 'timestamp': '2025-09-30 22:21:11.692415', 'step': 6433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:11.748642', 'step': 6433, 'epoch': 1} {'type': 'loss', 'content': 0.11336126923561096, 'timestamp': '2025-09-30 22:21:11.753106', 'step': 6434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:11.810204', 'step': 6434, 'epoch': 1} {'type': 'loss', 'content': 0.20429502427577972, 'timestamp': '2025-09-30 22:21:11.814436', 'step': 6435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:11.872101', 'step': 6435, 'epoch': 1} {'type': 'loss', 'content': 0.16188649833202362, 'timestamp': '2025-09-30 22:21:11.878757', 'step': 6436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:11.944884', 'step': 6436, 'epoch': 1} {'type': 'loss', 'content': 0.18356366455554962, 'timestamp': '2025-09-30 22:21:11.948654', 'step': 6437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.008324', 'step': 6437, 'epoch': 1} {'type': 'loss', 'content': 0.12189210206270218, 'timestamp': '2025-09-30 22:21:12.011886', 'step': 6438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.068468', 'step': 6438, 'epoch': 1} {'type': 'loss', 'content': 0.14639237523078918, 'timestamp': '2025-09-30 22:21:12.071355', 'step': 6439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.129466', 'step': 6439, 'epoch': 1} {'type': 'loss', 'content': 0.19113148748874664, 'timestamp': '2025-09-30 22:21:12.137701', 'step': 6440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:12.199257', 'step': 6440, 'epoch': 1} {'type': 'loss', 'content': 0.17941820621490479, 'timestamp': '2025-09-30 22:21:12.202285', 'step': 6441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:12.258973', 'step': 6441, 'epoch': 1} {'type': 'loss', 'content': 0.14899660646915436, 'timestamp': '2025-09-30 22:21:12.261180', 'step': 6442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:12.325441', 'step': 6442, 'epoch': 1} {'type': 'loss', 'content': 0.10714882612228394, 'timestamp': '2025-09-30 22:21:12.328661', 'step': 6443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.395127', 'step': 6443, 'epoch': 1} {'type': 'loss', 'content': 0.16407959163188934, 'timestamp': '2025-09-30 22:21:12.402120', 'step': 6444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:12.468329', 'step': 6444, 'epoch': 1} {'type': 'loss', 'content': 0.17104953527450562, 'timestamp': '2025-09-30 22:21:12.471923', 'step': 6445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.529573', 'step': 6445, 'epoch': 1} {'type': 'loss', 'content': 0.22525566816329956, 'timestamp': '2025-09-30 22:21:12.532497', 'step': 6446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:12.589762', 'step': 6446, 'epoch': 1} {'type': 'loss', 'content': 0.23791645467281342, 'timestamp': '2025-09-30 22:21:12.592160', 'step': 6447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:12.654481', 'step': 6447, 'epoch': 1} {'type': 'loss', 'content': 0.24528302252292633, 'timestamp': '2025-09-30 22:21:12.661875', 'step': 6448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.720968', 'step': 6448, 'epoch': 1} {'type': 'loss', 'content': 0.1778143048286438, 'timestamp': '2025-09-30 22:21:12.725936', 'step': 6449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:12.782987', 'step': 6449, 'epoch': 1} {'type': 'loss', 'content': 0.10709739476442337, 'timestamp': '2025-09-30 22:21:12.785906', 'step': 6450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.843730', 'step': 6450, 'epoch': 1} {'type': 'loss', 'content': 0.17243404686450958, 'timestamp': '2025-09-30 22:21:12.850195', 'step': 6451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:12.910677', 'step': 6451, 'epoch': 1} {'type': 'loss', 'content': 0.14130842685699463, 'timestamp': '2025-09-30 22:21:12.917030', 'step': 6452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:12.974242', 'step': 6452, 'epoch': 1} {'type': 'loss', 'content': 0.12986722588539124, 'timestamp': '2025-09-30 22:21:12.980162', 'step': 6453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:13.038156', 'step': 6453, 'epoch': 1} {'type': 'loss', 'content': 0.13372166454792023, 'timestamp': '2025-09-30 22:21:13.043654', 'step': 6454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:13.102455', 'step': 6454, 'epoch': 1} {'type': 'loss', 'content': 0.1377331018447876, 'timestamp': '2025-09-30 22:21:13.105131', 'step': 6455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:13.162368', 'step': 6455, 'epoch': 1} {'type': 'loss', 'content': 0.15605080127716064, 'timestamp': '2025-09-30 22:21:13.182273', 'step': 6456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:13.238647', 'step': 6456, 'epoch': 1} {'type': 'loss', 'content': 0.15201354026794434, 'timestamp': '2025-09-30 22:21:13.242015', 'step': 6457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:13.304081', 'step': 6457, 'epoch': 1} {'type': 'loss', 'content': 0.08165568858385086, 'timestamp': '2025-09-30 22:21:13.308112', 'step': 6458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:13.367615', 'step': 6458, 'epoch': 1} {'type': 'loss', 'content': 0.1821291744709015, 'timestamp': '2025-09-30 22:21:13.370429', 'step': 6459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:13.429585', 'step': 6459, 'epoch': 1} {'type': 'loss', 'content': 0.11455980688333511, 'timestamp': '2025-09-30 22:21:13.437352', 'step': 6460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:13.494778', 'step': 6460, 'epoch': 1} {'type': 'loss', 'content': 0.17643184959888458, 'timestamp': '2025-09-30 22:21:13.498235', 'step': 6461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:13.558659', 'step': 6461, 'epoch': 1} {'type': 'loss', 'content': 0.10750312358140945, 'timestamp': '2025-09-30 22:21:13.561472', 'step': 6462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:13.618663', 'step': 6462, 'epoch': 1} {'type': 'loss', 'content': 0.21660426259040833, 'timestamp': '2025-09-30 22:21:13.621613', 'step': 6463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:13.687343', 'step': 6463, 'epoch': 1} {'type': 'loss', 'content': 0.1783931851387024, 'timestamp': '2025-09-30 22:21:13.703363', 'step': 6464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:13.762074', 'step': 6464, 'epoch': 1} {'type': 'loss', 'content': 0.20023342967033386, 'timestamp': '2025-09-30 22:21:13.764610', 'step': 6465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:13.821804', 'step': 6465, 'epoch': 1} {'type': 'loss', 'content': 0.11845989525318146, 'timestamp': '2025-09-30 22:21:13.824754', 'step': 6466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:13.882167', 'step': 6466, 'epoch': 1} {'type': 'loss', 'content': 0.1375507414340973, 'timestamp': '2025-09-30 22:21:13.885406', 'step': 6467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:13.942601', 'step': 6467, 'epoch': 1} {'type': 'loss', 'content': 0.25849786400794983, 'timestamp': '2025-09-30 22:21:13.950422', 'step': 6468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.010626', 'step': 6468, 'epoch': 1} {'type': 'loss', 'content': 0.16520266234874725, 'timestamp': '2025-09-30 22:21:14.013691', 'step': 6469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.073084', 'step': 6469, 'epoch': 1} {'type': 'loss', 'content': 0.09684639424085617, 'timestamp': '2025-09-30 22:21:14.078636', 'step': 6470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:14.136137', 'step': 6470, 'epoch': 1} {'type': 'loss', 'content': 0.21640115976333618, 'timestamp': '2025-09-30 22:21:14.140657', 'step': 6471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.199692', 'step': 6471, 'epoch': 1} {'type': 'loss', 'content': 0.09628840535879135, 'timestamp': '2025-09-30 22:21:14.205937', 'step': 6472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.262232', 'step': 6472, 'epoch': 1} {'type': 'loss', 'content': 0.10990046709775925, 'timestamp': '2025-09-30 22:21:14.270418', 'step': 6473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:14.334807', 'step': 6473, 'epoch': 1} {'type': 'loss', 'content': 0.11211031675338745, 'timestamp': '2025-09-30 22:21:14.338114', 'step': 6474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:14.395878', 'step': 6474, 'epoch': 1} {'type': 'loss', 'content': 0.24014005064964294, 'timestamp': '2025-09-30 22:21:14.399765', 'step': 6475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:14.456848', 'step': 6475, 'epoch': 1} {'type': 'loss', 'content': 0.11411559581756592, 'timestamp': '2025-09-30 22:21:14.464296', 'step': 6476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.524594', 'step': 6476, 'epoch': 1} {'type': 'loss', 'content': 0.15033286809921265, 'timestamp': '2025-09-30 22:21:14.531657', 'step': 6477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.592766', 'step': 6477, 'epoch': 1} {'type': 'loss', 'content': 0.19677816331386566, 'timestamp': '2025-09-30 22:21:14.596922', 'step': 6478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.653473', 'step': 6478, 'epoch': 1} {'type': 'loss', 'content': 0.20955602824687958, 'timestamp': '2025-09-30 22:21:14.656759', 'step': 6479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.715906', 'step': 6479, 'epoch': 1} {'type': 'loss', 'content': 0.13646212220191956, 'timestamp': '2025-09-30 22:21:14.725650', 'step': 6480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.785895', 'step': 6480, 'epoch': 1} {'type': 'loss', 'content': 0.1674148440361023, 'timestamp': '2025-09-30 22:21:14.793088', 'step': 6481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:14.861988', 'step': 6481, 'epoch': 1} {'type': 'loss', 'content': 0.1359584480524063, 'timestamp': '2025-09-30 22:21:14.866333', 'step': 6482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:14.923395', 'step': 6482, 'epoch': 1} {'type': 'loss', 'content': 0.16684086620807648, 'timestamp': '2025-09-30 22:21:14.926469', 'step': 6483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:14.987081', 'step': 6483, 'epoch': 1} {'type': 'loss', 'content': 0.12461909651756287, 'timestamp': '2025-09-30 22:21:14.993946', 'step': 6484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:15.052091', 'step': 6484, 'epoch': 1} {'type': 'loss', 'content': 0.19099193811416626, 'timestamp': '2025-09-30 22:21:15.057051', 'step': 6485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:15.117409', 'step': 6485, 'epoch': 1} {'type': 'loss', 'content': 0.1318633258342743, 'timestamp': '2025-09-30 22:21:15.121279', 'step': 6486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:15.179618', 'step': 6486, 'epoch': 1} {'type': 'loss', 'content': 0.16461850702762604, 'timestamp': '2025-09-30 22:21:15.184373', 'step': 6487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:15.244023', 'step': 6487, 'epoch': 1} {'type': 'loss', 'content': 0.12467751652002335, 'timestamp': '2025-09-30 22:21:15.250616', 'step': 6488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:15.308067', 'step': 6488, 'epoch': 1} {'type': 'loss', 'content': 0.154169499874115, 'timestamp': '2025-09-30 22:21:15.310989', 'step': 6489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:15.376743', 'step': 6489, 'epoch': 1} {'type': 'loss', 'content': 0.15879233181476593, 'timestamp': '2025-09-30 22:21:15.379354', 'step': 6490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:15.436607', 'step': 6490, 'epoch': 1} {'type': 'loss', 'content': 0.12450262159109116, 'timestamp': '2025-09-30 22:21:15.447918', 'step': 6491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:15.504779', 'step': 6491, 'epoch': 1} {'type': 'loss', 'content': 0.15560956299304962, 'timestamp': '2025-09-30 22:21:15.511528', 'step': 6492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:15.567744', 'step': 6492, 'epoch': 1} {'type': 'loss', 'content': 0.20708546042442322, 'timestamp': '2025-09-30 22:21:15.571329', 'step': 6493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:15.629212', 'step': 6493, 'epoch': 1} {'type': 'loss', 'content': 0.15651853382587433, 'timestamp': '2025-09-30 22:21:15.641364', 'step': 6494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:15.699243', 'step': 6494, 'epoch': 1} {'type': 'loss', 'content': 0.15322795510292053, 'timestamp': '2025-09-30 22:21:15.703403', 'step': 6495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:15.761597', 'step': 6495, 'epoch': 1} {'type': 'loss', 'content': 0.13983896374702454, 'timestamp': '2025-09-30 22:21:15.768510', 'step': 6496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:15.825832', 'step': 6496, 'epoch': 1} {'type': 'loss', 'content': 0.15487127006053925, 'timestamp': '2025-09-30 22:21:15.828873', 'step': 6497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:15.885153', 'step': 6497, 'epoch': 1} {'type': 'loss', 'content': 0.14992071688175201, 'timestamp': '2025-09-30 22:21:15.888826', 'step': 6498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:15.958706', 'step': 6498, 'epoch': 1} {'type': 'loss', 'content': 0.19254618883132935, 'timestamp': '2025-09-30 22:21:15.964694', 'step': 6499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:16.021945', 'step': 6499, 'epoch': 1} {'type': 'loss', 'content': 0.26218053698539734, 'timestamp': '2025-09-30 22:21:16.035193', 'step': 6500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-30 22:21:16.507712', 'step': 6500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:16.569735', 'step': 6500, 'epoch': 1} {'type': 'loss', 'content': 0.2274363785982132, 'timestamp': '2025-09-30 22:21:16.576970', 'step': 6501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:16.635483', 'step': 6501, 'epoch': 1} {'type': 'loss', 'content': 0.18893665075302124, 'timestamp': '2025-09-30 22:21:16.638226', 'step': 6502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:16.700594', 'step': 6502, 'epoch': 1} {'type': 'loss', 'content': 0.15010298788547516, 'timestamp': '2025-09-30 22:21:16.703398', 'step': 6503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:16.763056', 'step': 6503, 'epoch': 1} {'type': 'loss', 'content': 0.212371364235878, 'timestamp': '2025-09-30 22:21:16.772144', 'step': 6504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:16.828655', 'step': 6504, 'epoch': 1} {'type': 'loss', 'content': 0.14394544064998627, 'timestamp': '2025-09-30 22:21:16.836981', 'step': 6505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:16.899341', 'step': 6505, 'epoch': 1} {'type': 'loss', 'content': 0.09575391560792923, 'timestamp': '2025-09-30 22:21:16.902633', 'step': 6506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:16.970953', 'step': 6506, 'epoch': 1} {'type': 'loss', 'content': 0.17001871764659882, 'timestamp': '2025-09-30 22:21:16.977254', 'step': 6507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:17.045250', 'step': 6507, 'epoch': 1} {'type': 'loss', 'content': 0.15761658549308777, 'timestamp': '2025-09-30 22:21:17.051873', 'step': 6508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:17.111814', 'step': 6508, 'epoch': 1} {'type': 'loss', 'content': 0.17803557217121124, 'timestamp': '2025-09-30 22:21:17.117524', 'step': 6509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:17.176270', 'step': 6509, 'epoch': 1} {'type': 'loss', 'content': 0.11386026442050934, 'timestamp': '2025-09-30 22:21:17.180911', 'step': 6510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:17.240182', 'step': 6510, 'epoch': 1} {'type': 'loss', 'content': 0.14012683928012848, 'timestamp': '2025-09-30 22:21:17.248779', 'step': 6511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:17.314738', 'step': 6511, 'epoch': 1} {'type': 'loss', 'content': 0.19578079879283905, 'timestamp': '2025-09-30 22:21:17.328886', 'step': 6512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:17.385496', 'step': 6512, 'epoch': 1} {'type': 'loss', 'content': 0.11991243809461594, 'timestamp': '2025-09-30 22:21:17.388265', 'step': 6513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:17.446958', 'step': 6513, 'epoch': 1} {'type': 'loss', 'content': 0.1883639693260193, 'timestamp': '2025-09-30 22:21:17.449783', 'step': 6514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:17.517451', 'step': 6514, 'epoch': 1} {'type': 'loss', 'content': 0.1457887440919876, 'timestamp': '2025-09-30 22:21:17.530845', 'step': 6515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:17.590107', 'step': 6515, 'epoch': 1} {'type': 'loss', 'content': 0.1564963012933731, 'timestamp': '2025-09-30 22:21:17.596828', 'step': 6516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:17.653798', 'step': 6516, 'epoch': 1} {'type': 'loss', 'content': 0.10963279008865356, 'timestamp': '2025-09-30 22:21:17.667563', 'step': 6517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:17.729174', 'step': 6517, 'epoch': 1} {'type': 'loss', 'content': 0.14868243038654327, 'timestamp': '2025-09-30 22:21:17.732311', 'step': 6518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:17.790771', 'step': 6518, 'epoch': 1} {'type': 'loss', 'content': 0.2328912317752838, 'timestamp': '2025-09-30 22:21:17.794031', 'step': 6519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:17.851560', 'step': 6519, 'epoch': 1} {'type': 'loss', 'content': 0.11691274493932724, 'timestamp': '2025-09-30 22:21:17.859015', 'step': 6520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:17.932060', 'step': 6520, 'epoch': 1} {'type': 'loss', 'content': 0.12974025309085846, 'timestamp': '2025-09-30 22:21:17.936171', 'step': 6521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:17.994775', 'step': 6521, 'epoch': 1} {'type': 'loss', 'content': 0.1823713332414627, 'timestamp': '2025-09-30 22:21:17.998077', 'step': 6522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:18.064781', 'step': 6522, 'epoch': 1} {'type': 'loss', 'content': 0.11127741634845734, 'timestamp': '2025-09-30 22:21:18.068849', 'step': 6523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:18.125901', 'step': 6523, 'epoch': 1} {'type': 'loss', 'content': 0.17648692429065704, 'timestamp': '2025-09-30 22:21:18.133420', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:21:32.656261', 'step': 6524, 'epoch': 1} {'type': 'pplx', 'content': 12628.140740178082, 'timestamp': '2025-09-30 22:21:32.670064', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:32.730687', 'step': 6524, 'epoch': 1} {'type': 'loss', 'content': 0.13344712555408478, 'timestamp': '2025-09-30 22:21:32.737662', 'step': 6525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:32.798850', 'step': 6525, 'epoch': 1} {'type': 'loss', 'content': 0.10683760792016983, 'timestamp': '2025-09-30 22:21:32.809027', 'step': 6526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:32.883444', 'step': 6526, 'epoch': 1} {'type': 'loss', 'content': 0.18758776783943176, 'timestamp': '2025-09-30 22:21:32.896635', 'step': 6527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:32.979955', 'step': 6527, 'epoch': 1} {'type': 'loss', 'content': 0.1108219102025032, 'timestamp': '2025-09-30 22:21:32.994955', 'step': 6528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:33.072831', 'step': 6528, 'epoch': 1} {'type': 'loss', 'content': 0.1605924665927887, 'timestamp': '2025-09-30 22:21:33.080601', 'step': 6529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:33.171107', 'step': 6529, 'epoch': 1} {'type': 'loss', 'content': 0.2931860685348511, 'timestamp': '2025-09-30 22:21:33.179769', 'step': 6530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:33.247877', 'step': 6530, 'epoch': 1} {'type': 'loss', 'content': 0.17001740634441376, 'timestamp': '2025-09-30 22:21:33.261401', 'step': 6531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:33.334096', 'step': 6531, 'epoch': 1} {'type': 'loss', 'content': 0.1271188110113144, 'timestamp': '2025-09-30 22:21:33.342429', 'step': 6532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:33.408590', 'step': 6532, 'epoch': 1} {'type': 'loss', 'content': 0.11115491390228271, 'timestamp': '2025-09-30 22:21:33.411565', 'step': 6533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:33.476822', 'step': 6533, 'epoch': 1} {'type': 'loss', 'content': 0.13837601244449615, 'timestamp': '2025-09-30 22:21:33.479622', 'step': 6534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:33.536914', 'step': 6534, 'epoch': 1} {'type': 'loss', 'content': 0.09690026938915253, 'timestamp': '2025-09-30 22:21:33.541863', 'step': 6535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:33.607069', 'step': 6535, 'epoch': 1} {'type': 'loss', 'content': 0.1603442132472992, 'timestamp': '2025-09-30 22:21:33.614217', 'step': 6536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:33.676368', 'step': 6536, 'epoch': 1} {'type': 'loss', 'content': 0.08781518042087555, 'timestamp': '2025-09-30 22:21:33.688742', 'step': 6537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:33.765738', 'step': 6537, 'epoch': 1} {'type': 'loss', 'content': 0.1770341843366623, 'timestamp': '2025-09-30 22:21:33.768563', 'step': 6538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:33.825902', 'step': 6538, 'epoch': 1} {'type': 'loss', 'content': 0.1683434098958969, 'timestamp': '2025-09-30 22:21:33.829390', 'step': 6539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:33.888583', 'step': 6539, 'epoch': 1} {'type': 'loss', 'content': 0.1587418019771576, 'timestamp': '2025-09-30 22:21:33.896113', 'step': 6540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:33.983110', 'step': 6540, 'epoch': 1} {'type': 'loss', 'content': 0.16036416590213776, 'timestamp': '2025-09-30 22:21:33.986324', 'step': 6541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:34.049557', 'step': 6541, 'epoch': 1} {'type': 'loss', 'content': 0.13576069474220276, 'timestamp': '2025-09-30 22:21:34.053034', 'step': 6542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:34.118960', 'step': 6542, 'epoch': 1} {'type': 'loss', 'content': 0.1807747185230255, 'timestamp': '2025-09-30 22:21:34.121646', 'step': 6543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:34.182948', 'step': 6543, 'epoch': 1} {'type': 'loss', 'content': 0.2118474543094635, 'timestamp': '2025-09-30 22:21:34.193288', 'step': 6544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:34.253579', 'step': 6544, 'epoch': 1} {'type': 'loss', 'content': 0.10046939551830292, 'timestamp': '2025-09-30 22:21:34.256228', 'step': 6545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:34.352114', 'step': 6545, 'epoch': 1} {'type': 'loss', 'content': 0.13156770169734955, 'timestamp': '2025-09-30 22:21:34.359139', 'step': 6546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:34.426115', 'step': 6546, 'epoch': 1} {'type': 'loss', 'content': 0.06300908327102661, 'timestamp': '2025-09-30 22:21:34.428757', 'step': 6547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:34.486577', 'step': 6547, 'epoch': 1} {'type': 'loss', 'content': 0.1074790507555008, 'timestamp': '2025-09-30 22:21:34.493376', 'step': 6548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:34.553754', 'step': 6548, 'epoch': 1} {'type': 'loss', 'content': 0.1766085922718048, 'timestamp': '2025-09-30 22:21:34.557261', 'step': 6549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:34.623443', 'step': 6549, 'epoch': 1} {'type': 'loss', 'content': 0.2026951164007187, 'timestamp': '2025-09-30 22:21:34.626006', 'step': 6550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:34.692312', 'step': 6550, 'epoch': 1} {'type': 'loss', 'content': 0.1843358278274536, 'timestamp': '2025-09-30 22:21:34.694855', 'step': 6551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:34.754785', 'step': 6551, 'epoch': 1} {'type': 'loss', 'content': 0.1743675321340561, 'timestamp': '2025-09-30 22:21:34.761325', 'step': 6552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:34.819219', 'step': 6552, 'epoch': 1} {'type': 'loss', 'content': 0.201778843998909, 'timestamp': '2025-09-30 22:21:34.823119', 'step': 6553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:34.882555', 'step': 6553, 'epoch': 1} {'type': 'loss', 'content': 0.05037583410739899, 'timestamp': '2025-09-30 22:21:34.886130', 'step': 6554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:34.947719', 'step': 6554, 'epoch': 1} {'type': 'loss', 'content': 0.15505556762218475, 'timestamp': '2025-09-30 22:21:34.955738', 'step': 6555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:35.013396', 'step': 6555, 'epoch': 1} {'type': 'loss', 'content': 0.21106889843940735, 'timestamp': '2025-09-30 22:21:35.019878', 'step': 6556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:35.079104', 'step': 6556, 'epoch': 1} {'type': 'loss', 'content': 0.11099892854690552, 'timestamp': '2025-09-30 22:21:35.081801', 'step': 6557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:21:35.138829', 'step': 6557, 'epoch': 1} {'type': 'loss', 'content': 0.1335260421037674, 'timestamp': '2025-09-30 22:21:35.141083', 'step': 6558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:35.197580', 'step': 6558, 'epoch': 1} {'type': 'loss', 'content': 0.23148295283317566, 'timestamp': '2025-09-30 22:21:35.199910', 'step': 6559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.256635', 'step': 6559, 'epoch': 1} {'type': 'loss', 'content': 0.16162170469760895, 'timestamp': '2025-09-30 22:21:35.264755', 'step': 6560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.322665', 'step': 6560, 'epoch': 1} {'type': 'loss', 'content': 0.1506936252117157, 'timestamp': '2025-09-30 22:21:35.325881', 'step': 6561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.382712', 'step': 6561, 'epoch': 1} {'type': 'loss', 'content': 0.15231429040431976, 'timestamp': '2025-09-30 22:21:35.385280', 'step': 6562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.445074', 'step': 6562, 'epoch': 1} {'type': 'loss', 'content': 0.22397258877754211, 'timestamp': '2025-09-30 22:21:35.449450', 'step': 6563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:35.509045', 'step': 6563, 'epoch': 1} {'type': 'loss', 'content': 0.21933460235595703, 'timestamp': '2025-09-30 22:21:35.518780', 'step': 6564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:35.576271', 'step': 6564, 'epoch': 1} {'type': 'loss', 'content': 0.12369176000356674, 'timestamp': '2025-09-30 22:21:35.579226', 'step': 6565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.636134', 'step': 6565, 'epoch': 1} {'type': 'loss', 'content': 0.1475595235824585, 'timestamp': '2025-09-30 22:21:35.639531', 'step': 6566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.696388', 'step': 6566, 'epoch': 1} {'type': 'loss', 'content': 0.14251349866390228, 'timestamp': '2025-09-30 22:21:35.701801', 'step': 6567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.760317', 'step': 6567, 'epoch': 1} {'type': 'loss', 'content': 0.13729967176914215, 'timestamp': '2025-09-30 22:21:35.768678', 'step': 6568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:35.825784', 'step': 6568, 'epoch': 1} {'type': 'loss', 'content': 0.1420341581106186, 'timestamp': '2025-09-30 22:21:35.833109', 'step': 6569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:35.890106', 'step': 6569, 'epoch': 1} {'type': 'loss', 'content': 0.2787310481071472, 'timestamp': '2025-09-30 22:21:35.898112', 'step': 6570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:35.955307', 'step': 6570, 'epoch': 1} {'type': 'loss', 'content': 0.1397981196641922, 'timestamp': '2025-09-30 22:21:35.957965', 'step': 6571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:36.032353', 'step': 6571, 'epoch': 1} {'type': 'loss', 'content': 0.16789159178733826, 'timestamp': '2025-09-30 22:21:36.043023', 'step': 6572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:36.113701', 'step': 6572, 'epoch': 1} {'type': 'loss', 'content': 0.0977037101984024, 'timestamp': '2025-09-30 22:21:36.116354', 'step': 6573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:36.177529', 'step': 6573, 'epoch': 1} {'type': 'loss', 'content': 0.10802866518497467, 'timestamp': '2025-09-30 22:21:36.189611', 'step': 6574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:36.247527', 'step': 6574, 'epoch': 1} {'type': 'loss', 'content': 0.19570297002792358, 'timestamp': '2025-09-30 22:21:36.250178', 'step': 6575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:36.312693', 'step': 6575, 'epoch': 1} {'type': 'loss', 'content': 0.12672321498394012, 'timestamp': '2025-09-30 22:21:36.319890', 'step': 6576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:36.380124', 'step': 6576, 'epoch': 1} {'type': 'loss', 'content': 0.11682930588722229, 'timestamp': '2025-09-30 22:21:36.387192', 'step': 6577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:36.447740', 'step': 6577, 'epoch': 1} {'type': 'loss', 'content': 0.15726491808891296, 'timestamp': '2025-09-30 22:21:36.450189', 'step': 6578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:36.510358', 'step': 6578, 'epoch': 1} {'type': 'loss', 'content': 0.13897153735160828, 'timestamp': '2025-09-30 22:21:36.513213', 'step': 6579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:36.573915', 'step': 6579, 'epoch': 1} {'type': 'loss', 'content': 0.2114422619342804, 'timestamp': '2025-09-30 22:21:36.580335', 'step': 6580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:36.647948', 'step': 6580, 'epoch': 1} {'type': 'loss', 'content': 0.1922738403081894, 'timestamp': '2025-09-30 22:21:36.651093', 'step': 6581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:36.707676', 'step': 6581, 'epoch': 1} {'type': 'loss', 'content': 0.13551123440265656, 'timestamp': '2025-09-30 22:21:36.722710', 'step': 6582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:36.785508', 'step': 6582, 'epoch': 1} {'type': 'loss', 'content': 0.11928394436836243, 'timestamp': '2025-09-30 22:21:36.790004', 'step': 6583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:36.846532', 'step': 6583, 'epoch': 1} {'type': 'loss', 'content': 0.22157326340675354, 'timestamp': '2025-09-30 22:21:36.853973', 'step': 6584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:36.917411', 'step': 6584, 'epoch': 1} {'type': 'loss', 'content': 0.24778328835964203, 'timestamp': '2025-09-30 22:21:36.930990', 'step': 6585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:36.992122', 'step': 6585, 'epoch': 1} {'type': 'loss', 'content': 0.10390861332416534, 'timestamp': '2025-09-30 22:21:36.995157', 'step': 6586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:37.060675', 'step': 6586, 'epoch': 1} {'type': 'loss', 'content': 0.14195333421230316, 'timestamp': '2025-09-30 22:21:37.066852', 'step': 6587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:37.142392', 'step': 6587, 'epoch': 1} {'type': 'loss', 'content': 0.17568793892860413, 'timestamp': '2025-09-30 22:21:37.152509', 'step': 6588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:37.211097', 'step': 6588, 'epoch': 1} {'type': 'loss', 'content': 0.15184560418128967, 'timestamp': '2025-09-30 22:21:37.216159', 'step': 6589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:37.287587', 'step': 6589, 'epoch': 1} {'type': 'loss', 'content': 0.12897330522537231, 'timestamp': '2025-09-30 22:21:37.290761', 'step': 6590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:37.348127', 'step': 6590, 'epoch': 1} {'type': 'loss', 'content': 0.10769284516572952, 'timestamp': '2025-09-30 22:21:37.354500', 'step': 6591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:37.418413', 'step': 6591, 'epoch': 1} {'type': 'loss', 'content': 0.25059762597084045, 'timestamp': '2025-09-30 22:21:37.426628', 'step': 6592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:37.483797', 'step': 6592, 'epoch': 1} {'type': 'loss', 'content': 0.10676063597202301, 'timestamp': '2025-09-30 22:21:37.489309', 'step': 6593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:37.548902', 'step': 6593, 'epoch': 1} {'type': 'loss', 'content': 0.17996224761009216, 'timestamp': '2025-09-30 22:21:37.551286', 'step': 6594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:37.608726', 'step': 6594, 'epoch': 1} {'type': 'loss', 'content': 0.13442674279212952, 'timestamp': '2025-09-30 22:21:37.611236', 'step': 6595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:37.671828', 'step': 6595, 'epoch': 1} {'type': 'loss', 'content': 0.13342085480690002, 'timestamp': '2025-09-30 22:21:37.678461', 'step': 6596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:37.735048', 'step': 6596, 'epoch': 1} {'type': 'loss', 'content': 0.21310466527938843, 'timestamp': '2025-09-30 22:21:37.741107', 'step': 6597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:37.801049', 'step': 6597, 'epoch': 1} {'type': 'loss', 'content': 0.07695517688989639, 'timestamp': '2025-09-30 22:21:37.803824', 'step': 6598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:37.863382', 'step': 6598, 'epoch': 1} {'type': 'loss', 'content': 0.09947334975004196, 'timestamp': '2025-09-30 22:21:37.866282', 'step': 6599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:37.922379', 'step': 6599, 'epoch': 1} {'type': 'loss', 'content': 0.09546372294425964, 'timestamp': '2025-09-30 22:21:37.928863', 'step': 6600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:37.984931', 'step': 6600, 'epoch': 1} {'type': 'loss', 'content': 0.11108376830816269, 'timestamp': '2025-09-30 22:21:37.988342', 'step': 6601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.046602', 'step': 6601, 'epoch': 1} {'type': 'loss', 'content': 0.1400846391916275, 'timestamp': '2025-09-30 22:21:38.048999', 'step': 6602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:38.105063', 'step': 6602, 'epoch': 1} {'type': 'loss', 'content': 0.20182903110980988, 'timestamp': '2025-09-30 22:21:38.107751', 'step': 6603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.164425', 'step': 6603, 'epoch': 1} {'type': 'loss', 'content': 0.28658798336982727, 'timestamp': '2025-09-30 22:21:38.170622', 'step': 6604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:38.226254', 'step': 6604, 'epoch': 1} {'type': 'loss', 'content': 0.07941395789384842, 'timestamp': '2025-09-30 22:21:38.229640', 'step': 6605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.286655', 'step': 6605, 'epoch': 1} {'type': 'loss', 'content': 0.13850609958171844, 'timestamp': '2025-09-30 22:21:38.289619', 'step': 6606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:38.349014', 'step': 6606, 'epoch': 1} {'type': 'loss', 'content': 0.11703819781541824, 'timestamp': '2025-09-30 22:21:38.351577', 'step': 6607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.409435', 'step': 6607, 'epoch': 1} {'type': 'loss', 'content': 0.15227010846138, 'timestamp': '2025-09-30 22:21:38.415874', 'step': 6608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:38.471817', 'step': 6608, 'epoch': 1} {'type': 'loss', 'content': 0.16239623725414276, 'timestamp': '2025-09-30 22:21:38.474724', 'step': 6609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:38.532654', 'step': 6609, 'epoch': 1} {'type': 'loss', 'content': 0.2567352056503296, 'timestamp': '2025-09-30 22:21:38.539093', 'step': 6610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:38.596181', 'step': 6610, 'epoch': 1} {'type': 'loss', 'content': 0.15333367884159088, 'timestamp': '2025-09-30 22:21:38.598892', 'step': 6611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.664815', 'step': 6611, 'epoch': 1} {'type': 'loss', 'content': 0.16328907012939453, 'timestamp': '2025-09-30 22:21:38.671027', 'step': 6612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.727380', 'step': 6612, 'epoch': 1} {'type': 'loss', 'content': 0.18665729463100433, 'timestamp': '2025-09-30 22:21:38.730678', 'step': 6613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.799034', 'step': 6613, 'epoch': 1} {'type': 'loss', 'content': 0.172119602560997, 'timestamp': '2025-09-30 22:21:38.803163', 'step': 6614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.861890', 'step': 6614, 'epoch': 1} {'type': 'loss', 'content': 0.09777680784463882, 'timestamp': '2025-09-30 22:21:38.864668', 'step': 6615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:38.922149', 'step': 6615, 'epoch': 1} {'type': 'loss', 'content': 0.22814610600471497, 'timestamp': '2025-09-30 22:21:38.928235', 'step': 6616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:38.986931', 'step': 6616, 'epoch': 1} {'type': 'loss', 'content': 0.1588120460510254, 'timestamp': '2025-09-30 22:21:38.989639', 'step': 6617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:39.047908', 'step': 6617, 'epoch': 1} {'type': 'loss', 'content': 0.10211259871721268, 'timestamp': '2025-09-30 22:21:39.062009', 'step': 6618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.129863', 'step': 6618, 'epoch': 1} {'type': 'loss', 'content': 0.13416773080825806, 'timestamp': '2025-09-30 22:21:39.132733', 'step': 6619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.189731', 'step': 6619, 'epoch': 1} {'type': 'loss', 'content': 0.1256304532289505, 'timestamp': '2025-09-30 22:21:39.196287', 'step': 6620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:39.253743', 'step': 6620, 'epoch': 1} {'type': 'loss', 'content': 0.19949254393577576, 'timestamp': '2025-09-30 22:21:39.261466', 'step': 6621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:39.318725', 'step': 6621, 'epoch': 1} {'type': 'loss', 'content': 0.132716566324234, 'timestamp': '2025-09-30 22:21:39.321898', 'step': 6622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.391540', 'step': 6622, 'epoch': 1} {'type': 'loss', 'content': 0.2128894180059433, 'timestamp': '2025-09-30 22:21:39.398587', 'step': 6623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.456287', 'step': 6623, 'epoch': 1} {'type': 'loss', 'content': 0.1267910748720169, 'timestamp': '2025-09-30 22:21:39.464438', 'step': 6624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:39.522781', 'step': 6624, 'epoch': 1} {'type': 'loss', 'content': 0.18737472593784332, 'timestamp': '2025-09-30 22:21:39.525657', 'step': 6625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.585485', 'step': 6625, 'epoch': 1} {'type': 'loss', 'content': 0.16306649148464203, 'timestamp': '2025-09-30 22:21:39.592068', 'step': 6626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:39.656871', 'step': 6626, 'epoch': 1} {'type': 'loss', 'content': 0.09008460491895676, 'timestamp': '2025-09-30 22:21:39.659943', 'step': 6627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:39.721613', 'step': 6627, 'epoch': 1} {'type': 'loss', 'content': 0.19852948188781738, 'timestamp': '2025-09-30 22:21:39.732413', 'step': 6628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.802407', 'step': 6628, 'epoch': 1} {'type': 'loss', 'content': 0.2232566475868225, 'timestamp': '2025-09-30 22:21:39.805319', 'step': 6629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.865717', 'step': 6629, 'epoch': 1} {'type': 'loss', 'content': 0.13108308613300323, 'timestamp': '2025-09-30 22:21:39.872324', 'step': 6630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:39.937373', 'step': 6630, 'epoch': 1} {'type': 'loss', 'content': 0.24614626169204712, 'timestamp': '2025-09-30 22:21:39.939781', 'step': 6631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:39.997777', 'step': 6631, 'epoch': 1} {'type': 'loss', 'content': 0.10577388107776642, 'timestamp': '2025-09-30 22:21:40.004787', 'step': 6632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:40.067953', 'step': 6632, 'epoch': 1} {'type': 'loss', 'content': 0.1386987864971161, 'timestamp': '2025-09-30 22:21:40.071335', 'step': 6633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:40.128743', 'step': 6633, 'epoch': 1} {'type': 'loss', 'content': 0.1611398160457611, 'timestamp': '2025-09-30 22:21:40.132309', 'step': 6634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:40.190064', 'step': 6634, 'epoch': 1} {'type': 'loss', 'content': 0.2421722412109375, 'timestamp': '2025-09-30 22:21:40.193197', 'step': 6635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:40.258227', 'step': 6635, 'epoch': 1} {'type': 'loss', 'content': 0.17265203595161438, 'timestamp': '2025-09-30 22:21:40.265723', 'step': 6636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:40.333097', 'step': 6636, 'epoch': 1} {'type': 'loss', 'content': 0.06558098644018173, 'timestamp': '2025-09-30 22:21:40.336423', 'step': 6637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:40.393042', 'step': 6637, 'epoch': 1} {'type': 'loss', 'content': 0.16361767053604126, 'timestamp': '2025-09-30 22:21:40.396385', 'step': 6638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:40.471780', 'step': 6638, 'epoch': 1} {'type': 'loss', 'content': 0.1612517535686493, 'timestamp': '2025-09-30 22:21:40.483097', 'step': 6639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:40.568954', 'step': 6639, 'epoch': 1} {'type': 'loss', 'content': 0.13722074031829834, 'timestamp': '2025-09-30 22:21:40.575393', 'step': 6640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:40.659574', 'step': 6640, 'epoch': 1} {'type': 'loss', 'content': 0.24651311337947845, 'timestamp': '2025-09-30 22:21:40.662468', 'step': 6641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:40.719599', 'step': 6641, 'epoch': 1} {'type': 'loss', 'content': 0.14353203773498535, 'timestamp': '2025-09-30 22:21:40.723125', 'step': 6642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:21:40.781056', 'step': 6642, 'epoch': 1} {'type': 'loss', 'content': 0.2467440366744995, 'timestamp': '2025-09-30 22:21:40.783863', 'step': 6643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:40.853870', 'step': 6643, 'epoch': 1} {'type': 'loss', 'content': 0.1844598352909088, 'timestamp': '2025-09-30 22:21:40.859981', 'step': 6644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:40.919435', 'step': 6644, 'epoch': 1} {'type': 'loss', 'content': 0.09681063145399094, 'timestamp': '2025-09-30 22:21:40.924928', 'step': 6645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:40.984710', 'step': 6645, 'epoch': 1} {'type': 'loss', 'content': 0.14627355337142944, 'timestamp': '2025-09-30 22:21:40.988922', 'step': 6646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:41.052331', 'step': 6646, 'epoch': 1} {'type': 'loss', 'content': 0.08283522725105286, 'timestamp': '2025-09-30 22:21:41.062274', 'step': 6647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:41.134544', 'step': 6647, 'epoch': 1} {'type': 'loss', 'content': 0.09334232658147812, 'timestamp': '2025-09-30 22:21:41.141302', 'step': 6648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:41.197641', 'step': 6648, 'epoch': 1} {'type': 'loss', 'content': 0.24442411959171295, 'timestamp': '2025-09-30 22:21:41.202409', 'step': 6649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:41.260208', 'step': 6649, 'epoch': 1} {'type': 'loss', 'content': 0.23764082789421082, 'timestamp': '2025-09-30 22:21:41.264775', 'step': 6650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:41.321404', 'step': 6650, 'epoch': 1} {'type': 'loss', 'content': 0.08959151804447174, 'timestamp': '2025-09-30 22:21:41.324483', 'step': 6651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:41.385664', 'step': 6651, 'epoch': 1} {'type': 'loss', 'content': 0.20840249955654144, 'timestamp': '2025-09-30 22:21:41.393309', 'step': 6652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:41.453656', 'step': 6652, 'epoch': 1} {'type': 'loss', 'content': 0.22902175784111023, 'timestamp': '2025-09-30 22:21:41.456992', 'step': 6653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:41.519281', 'step': 6653, 'epoch': 1} {'type': 'loss', 'content': 0.1131942868232727, 'timestamp': '2025-09-30 22:21:41.522546', 'step': 6654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:41.593465', 'step': 6654, 'epoch': 1} {'type': 'loss', 'content': 0.20300839841365814, 'timestamp': '2025-09-30 22:21:41.606915', 'step': 6655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:41.675496', 'step': 6655, 'epoch': 1} {'type': 'loss', 'content': 0.176356703042984, 'timestamp': '2025-09-30 22:21:41.681667', 'step': 6656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:41.739216', 'step': 6656, 'epoch': 1} {'type': 'loss', 'content': 0.1497802436351776, 'timestamp': '2025-09-30 22:21:41.742568', 'step': 6657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:41.799470', 'step': 6657, 'epoch': 1} {'type': 'loss', 'content': 0.2068294733762741, 'timestamp': '2025-09-30 22:21:41.802391', 'step': 6658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:41.864956', 'step': 6658, 'epoch': 1} {'type': 'loss', 'content': 0.11663208901882172, 'timestamp': '2025-09-30 22:21:41.868638', 'step': 6659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:41.939742', 'step': 6659, 'epoch': 1} {'type': 'loss', 'content': 0.19331349432468414, 'timestamp': '2025-09-30 22:21:41.946570', 'step': 6660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:42.006016', 'step': 6660, 'epoch': 1} {'type': 'loss', 'content': 0.058650385588407516, 'timestamp': '2025-09-30 22:21:42.008637', 'step': 6661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:42.066555', 'step': 6661, 'epoch': 1} {'type': 'loss', 'content': 0.1175651028752327, 'timestamp': '2025-09-30 22:21:42.069848', 'step': 6662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:42.127266', 'step': 6662, 'epoch': 1} {'type': 'loss', 'content': 0.11790075898170471, 'timestamp': '2025-09-30 22:21:42.133373', 'step': 6663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:42.189912', 'step': 6663, 'epoch': 1} {'type': 'loss', 'content': 0.12982484698295593, 'timestamp': '2025-09-30 22:21:42.198791', 'step': 6664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:42.255669', 'step': 6664, 'epoch': 1} {'type': 'loss', 'content': 0.14591924846172333, 'timestamp': '2025-09-30 22:21:42.259272', 'step': 6665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:42.325937', 'step': 6665, 'epoch': 1} {'type': 'loss', 'content': 0.09895700961351395, 'timestamp': '2025-09-30 22:21:42.328232', 'step': 6666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:42.388480', 'step': 6666, 'epoch': 1} {'type': 'loss', 'content': 0.33170872926712036, 'timestamp': '2025-09-30 22:21:42.395788', 'step': 6667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:42.455809', 'step': 6667, 'epoch': 1} {'type': 'loss', 'content': 0.15422146022319794, 'timestamp': '2025-09-30 22:21:42.464526', 'step': 6668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:42.525368', 'step': 6668, 'epoch': 1} {'type': 'loss', 'content': 0.17104865610599518, 'timestamp': '2025-09-30 22:21:42.528427', 'step': 6669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:42.585833', 'step': 6669, 'epoch': 1} {'type': 'loss', 'content': 0.0957503616809845, 'timestamp': '2025-09-30 22:21:42.602382', 'step': 6670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:42.661434', 'step': 6670, 'epoch': 1} {'type': 'loss', 'content': 0.04397158324718475, 'timestamp': '2025-09-30 22:21:42.664466', 'step': 6671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:42.725075', 'step': 6671, 'epoch': 1} {'type': 'loss', 'content': 0.13759294152259827, 'timestamp': '2025-09-30 22:21:42.734019', 'step': 6672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:42.789561', 'step': 6672, 'epoch': 1} {'type': 'loss', 'content': 0.20979757606983185, 'timestamp': '2025-09-30 22:21:42.795456', 'step': 6673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:42.852710', 'step': 6673, 'epoch': 1} {'type': 'loss', 'content': 0.19341342151165009, 'timestamp': '2025-09-30 22:21:42.858098', 'step': 6674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:42.917452', 'step': 6674, 'epoch': 1} {'type': 'loss', 'content': 0.21695297956466675, 'timestamp': '2025-09-30 22:21:42.921303', 'step': 6675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:42.978850', 'step': 6675, 'epoch': 1} {'type': 'loss', 'content': 0.0854315534234047, 'timestamp': '2025-09-30 22:21:42.985381', 'step': 6676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:43.041439', 'step': 6676, 'epoch': 1} {'type': 'loss', 'content': 0.12969595193862915, 'timestamp': '2025-09-30 22:21:43.045361', 'step': 6677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:43.103322', 'step': 6677, 'epoch': 1} {'type': 'loss', 'content': 0.1297065168619156, 'timestamp': '2025-09-30 22:21:43.106233', 'step': 6678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:43.169679', 'step': 6678, 'epoch': 1} {'type': 'loss', 'content': 0.18939906358718872, 'timestamp': '2025-09-30 22:21:43.173106', 'step': 6679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:43.242524', 'step': 6679, 'epoch': 1} {'type': 'loss', 'content': 0.2000025361776352, 'timestamp': '2025-09-30 22:21:43.249045', 'step': 6680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:43.314113', 'step': 6680, 'epoch': 1} {'type': 'loss', 'content': 0.10067901760339737, 'timestamp': '2025-09-30 22:21:43.320661', 'step': 6681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:43.404047', 'step': 6681, 'epoch': 1} {'type': 'loss', 'content': 0.16115175187587738, 'timestamp': '2025-09-30 22:21:43.408748', 'step': 6682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:43.482639', 'step': 6682, 'epoch': 1} {'type': 'loss', 'content': 0.24101415276527405, 'timestamp': '2025-09-30 22:21:43.490251', 'step': 6683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:43.552572', 'step': 6683, 'epoch': 1} {'type': 'loss', 'content': 0.1717558354139328, 'timestamp': '2025-09-30 22:21:43.564202', 'step': 6684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:43.638438', 'step': 6684, 'epoch': 1} {'type': 'loss', 'content': 0.18351544439792633, 'timestamp': '2025-09-30 22:21:43.641330', 'step': 6685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:43.702802', 'step': 6685, 'epoch': 1} {'type': 'loss', 'content': 0.10395295172929764, 'timestamp': '2025-09-30 22:21:43.706843', 'step': 6686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:43.763161', 'step': 6686, 'epoch': 1} {'type': 'loss', 'content': 0.13542303442955017, 'timestamp': '2025-09-30 22:21:43.765849', 'step': 6687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:43.822242', 'step': 6687, 'epoch': 1} {'type': 'loss', 'content': 0.18362967669963837, 'timestamp': '2025-09-30 22:21:43.828444', 'step': 6688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:43.885528', 'step': 6688, 'epoch': 1} {'type': 'loss', 'content': 0.23926520347595215, 'timestamp': '2025-09-30 22:21:43.900125', 'step': 6689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:43.962859', 'step': 6689, 'epoch': 1} {'type': 'loss', 'content': 0.21993018686771393, 'timestamp': '2025-09-30 22:21:43.965995', 'step': 6690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:44.023237', 'step': 6690, 'epoch': 1} {'type': 'loss', 'content': 0.19276759028434753, 'timestamp': '2025-09-30 22:21:44.028092', 'step': 6691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:44.094522', 'step': 6691, 'epoch': 1} {'type': 'loss', 'content': 0.157037153840065, 'timestamp': '2025-09-30 22:21:44.101632', 'step': 6692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:44.163635', 'step': 6692, 'epoch': 1} {'type': 'loss', 'content': 0.1853257566690445, 'timestamp': '2025-09-30 22:21:44.167161', 'step': 6693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:44.223695', 'step': 6693, 'epoch': 1} {'type': 'loss', 'content': 0.1274084448814392, 'timestamp': '2025-09-30 22:21:44.227558', 'step': 6694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:44.291236', 'step': 6694, 'epoch': 1} {'type': 'loss', 'content': 0.14065144956111908, 'timestamp': '2025-09-30 22:21:44.295173', 'step': 6695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:44.362558', 'step': 6695, 'epoch': 1} {'type': 'loss', 'content': 0.12479817867279053, 'timestamp': '2025-09-30 22:21:44.374689', 'step': 6696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:44.446830', 'step': 6696, 'epoch': 1} {'type': 'loss', 'content': 0.17021393775939941, 'timestamp': '2025-09-30 22:21:44.451882', 'step': 6697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:44.541969', 'step': 6697, 'epoch': 1} {'type': 'loss', 'content': 0.18810972571372986, 'timestamp': '2025-09-30 22:21:44.554034', 'step': 6698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:44.620351', 'step': 6698, 'epoch': 1} {'type': 'loss', 'content': 0.1874929815530777, 'timestamp': '2025-09-30 22:21:44.626358', 'step': 6699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:44.684400', 'step': 6699, 'epoch': 1} {'type': 'loss', 'content': 0.12208040803670883, 'timestamp': '2025-09-30 22:21:44.691161', 'step': 6700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:44.748989', 'step': 6700, 'epoch': 1} {'type': 'loss', 'content': 0.19038502871990204, 'timestamp': '2025-09-30 22:21:44.753905', 'step': 6701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:44.812846', 'step': 6701, 'epoch': 1} {'type': 'loss', 'content': 0.2407892793416977, 'timestamp': '2025-09-30 22:21:44.819139', 'step': 6702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:44.877938', 'step': 6702, 'epoch': 1} {'type': 'loss', 'content': 0.16264331340789795, 'timestamp': '2025-09-30 22:21:44.882773', 'step': 6703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:44.940461', 'step': 6703, 'epoch': 1} {'type': 'loss', 'content': 0.17728260159492493, 'timestamp': '2025-09-30 22:21:44.949411', 'step': 6704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:45.006880', 'step': 6704, 'epoch': 1} {'type': 'loss', 'content': 0.1818772852420807, 'timestamp': '2025-09-30 22:21:45.011778', 'step': 6705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:21:45.082674', 'step': 6705, 'epoch': 1} {'type': 'loss', 'content': 0.17956949770450592, 'timestamp': '2025-09-30 22:21:45.097037', 'step': 6706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:45.155602', 'step': 6706, 'epoch': 1} {'type': 'loss', 'content': 0.15685135126113892, 'timestamp': '2025-09-30 22:21:45.162077', 'step': 6707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:45.224972', 'step': 6707, 'epoch': 1} {'type': 'loss', 'content': 0.15452997386455536, 'timestamp': '2025-09-30 22:21:45.232427', 'step': 6708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:45.302755', 'step': 6708, 'epoch': 1} {'type': 'loss', 'content': 0.10491053014993668, 'timestamp': '2025-09-30 22:21:45.306094', 'step': 6709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:45.363044', 'step': 6709, 'epoch': 1} {'type': 'loss', 'content': 0.22542215883731842, 'timestamp': '2025-09-30 22:21:45.367283', 'step': 6710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:45.425564', 'step': 6710, 'epoch': 1} {'type': 'loss', 'content': 0.2262738049030304, 'timestamp': '2025-09-30 22:21:45.431368', 'step': 6711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:45.490465', 'step': 6711, 'epoch': 1} {'type': 'loss', 'content': 0.15524065494537354, 'timestamp': '2025-09-30 22:21:45.508267', 'step': 6712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:45.565366', 'step': 6712, 'epoch': 1} {'type': 'loss', 'content': 0.13118833303451538, 'timestamp': '2025-09-30 22:21:45.569698', 'step': 6713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:45.637946', 'step': 6713, 'epoch': 1} {'type': 'loss', 'content': 0.157220259308815, 'timestamp': '2025-09-30 22:21:45.642918', 'step': 6714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:45.703756', 'step': 6714, 'epoch': 1} {'type': 'loss', 'content': 0.09894455969333649, 'timestamp': '2025-09-30 22:21:45.707939', 'step': 6715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:45.765212', 'step': 6715, 'epoch': 1} {'type': 'loss', 'content': 0.178481325507164, 'timestamp': '2025-09-30 22:21:45.772560', 'step': 6716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:21:45.837807', 'step': 6716, 'epoch': 1} {'type': 'loss', 'content': 0.19162362813949585, 'timestamp': '2025-09-30 22:21:45.840872', 'step': 6717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:45.900182', 'step': 6717, 'epoch': 1} {'type': 'loss', 'content': 0.15473204851150513, 'timestamp': '2025-09-30 22:21:45.903934', 'step': 6718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:45.970851', 'step': 6718, 'epoch': 1} {'type': 'loss', 'content': 0.18042390048503876, 'timestamp': '2025-09-30 22:21:45.973918', 'step': 6719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.040862', 'step': 6719, 'epoch': 1} {'type': 'loss', 'content': 0.1534854769706726, 'timestamp': '2025-09-30 22:21:46.047848', 'step': 6720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.123524', 'step': 6720, 'epoch': 1} {'type': 'loss', 'content': 0.20652003586292267, 'timestamp': '2025-09-30 22:21:46.126819', 'step': 6721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:46.188816', 'step': 6721, 'epoch': 1} {'type': 'loss', 'content': 0.2668874263763428, 'timestamp': '2025-09-30 22:21:46.192329', 'step': 6722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:46.257232', 'step': 6722, 'epoch': 1} {'type': 'loss', 'content': 0.19493241608142853, 'timestamp': '2025-09-30 22:21:46.261139', 'step': 6723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.321834', 'step': 6723, 'epoch': 1} {'type': 'loss', 'content': 0.10546986758708954, 'timestamp': '2025-09-30 22:21:46.329145', 'step': 6724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.387705', 'step': 6724, 'epoch': 1} {'type': 'loss', 'content': 0.11513573676347733, 'timestamp': '2025-09-30 22:21:46.392272', 'step': 6725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:46.450950', 'step': 6725, 'epoch': 1} {'type': 'loss', 'content': 0.11595135182142258, 'timestamp': '2025-09-30 22:21:46.456019', 'step': 6726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:46.520771', 'step': 6726, 'epoch': 1} {'type': 'loss', 'content': 0.12894493341445923, 'timestamp': '2025-09-30 22:21:46.524786', 'step': 6727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.589572', 'step': 6727, 'epoch': 1} {'type': 'loss', 'content': 0.11666756868362427, 'timestamp': '2025-09-30 22:21:46.596782', 'step': 6728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:46.653614', 'step': 6728, 'epoch': 1} {'type': 'loss', 'content': 0.18321658670902252, 'timestamp': '2025-09-30 22:21:46.657218', 'step': 6729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.719950', 'step': 6729, 'epoch': 1} {'type': 'loss', 'content': 0.1936706304550171, 'timestamp': '2025-09-30 22:21:46.723833', 'step': 6730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:46.797799', 'step': 6730, 'epoch': 1} {'type': 'loss', 'content': 0.20554649829864502, 'timestamp': '2025-09-30 22:21:46.801425', 'step': 6731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.860055', 'step': 6731, 'epoch': 1} {'type': 'loss', 'content': 0.27096620202064514, 'timestamp': '2025-09-30 22:21:46.868924', 'step': 6732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:46.936953', 'step': 6732, 'epoch': 1} {'type': 'loss', 'content': 0.12968577444553375, 'timestamp': '2025-09-30 22:21:46.940526', 'step': 6733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.002033', 'step': 6733, 'epoch': 1} {'type': 'loss', 'content': 0.18710209429264069, 'timestamp': '2025-09-30 22:21:47.010641', 'step': 6734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.076410', 'step': 6734, 'epoch': 1} {'type': 'loss', 'content': 0.05975194647908211, 'timestamp': '2025-09-30 22:21:47.079316', 'step': 6735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.137175', 'step': 6735, 'epoch': 1} {'type': 'loss', 'content': 0.17806470394134521, 'timestamp': '2025-09-30 22:21:47.144132', 'step': 6736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.213959', 'step': 6736, 'epoch': 1} {'type': 'loss', 'content': 0.1696237474679947, 'timestamp': '2025-09-30 22:21:47.217268', 'step': 6737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.276431', 'step': 6737, 'epoch': 1} {'type': 'loss', 'content': 0.1501377671957016, 'timestamp': '2025-09-30 22:21:47.280984', 'step': 6738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:47.347306', 'step': 6738, 'epoch': 1} {'type': 'loss', 'content': 0.17814987897872925, 'timestamp': '2025-09-30 22:21:47.352024', 'step': 6739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:47.409023', 'step': 6739, 'epoch': 1} {'type': 'loss', 'content': 0.12962760031223297, 'timestamp': '2025-09-30 22:21:47.417813', 'step': 6740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.474245', 'step': 6740, 'epoch': 1} {'type': 'loss', 'content': 0.20964160561561584, 'timestamp': '2025-09-30 22:21:47.478386', 'step': 6741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:47.538168', 'step': 6741, 'epoch': 1} {'type': 'loss', 'content': 0.14705929160118103, 'timestamp': '2025-09-30 22:21:47.543251', 'step': 6742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:47.601726', 'step': 6742, 'epoch': 1} {'type': 'loss', 'content': 0.1338844746351242, 'timestamp': '2025-09-30 22:21:47.604634', 'step': 6743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.669452', 'step': 6743, 'epoch': 1} {'type': 'loss', 'content': 0.1299351453781128, 'timestamp': '2025-09-30 22:21:47.677444', 'step': 6744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:47.735713', 'step': 6744, 'epoch': 1} {'type': 'loss', 'content': 0.128133624792099, 'timestamp': '2025-09-30 22:21:47.737969', 'step': 6745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:47.808722', 'step': 6745, 'epoch': 1} {'type': 'loss', 'content': 0.12597279250621796, 'timestamp': '2025-09-30 22:21:47.811865', 'step': 6746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:47.870197', 'step': 6746, 'epoch': 1} {'type': 'loss', 'content': 0.15552835166454315, 'timestamp': '2025-09-30 22:21:47.874743', 'step': 6747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:47.936682', 'step': 6747, 'epoch': 1} {'type': 'loss', 'content': 0.18288087844848633, 'timestamp': '2025-09-30 22:21:47.944851', 'step': 6748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:48.006886', 'step': 6748, 'epoch': 1} {'type': 'loss', 'content': 0.12351687997579575, 'timestamp': '2025-09-30 22:21:48.010083', 'step': 6749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:48.068226', 'step': 6749, 'epoch': 1} {'type': 'loss', 'content': 0.2432226538658142, 'timestamp': '2025-09-30 22:21:48.071629', 'step': 6750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:48.130235', 'step': 6750, 'epoch': 1} {'type': 'loss', 'content': 0.11377640068531036, 'timestamp': '2025-09-30 22:21:48.140788', 'step': 6751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:48.202669', 'step': 6751, 'epoch': 1} {'type': 'loss', 'content': 0.18450205028057098, 'timestamp': '2025-09-30 22:21:48.210224', 'step': 6752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:48.268585', 'step': 6752, 'epoch': 1} {'type': 'loss', 'content': 0.17268948256969452, 'timestamp': '2025-09-30 22:21:48.279783', 'step': 6753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:48.365343', 'step': 6753, 'epoch': 1} {'type': 'loss', 'content': 0.1425262838602066, 'timestamp': '2025-09-30 22:21:48.385851', 'step': 6754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:48.451332', 'step': 6754, 'epoch': 1} {'type': 'loss', 'content': 0.16539615392684937, 'timestamp': '2025-09-30 22:21:48.465615', 'step': 6755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:48.526162', 'step': 6755, 'epoch': 1} {'type': 'loss', 'content': 0.1539650410413742, 'timestamp': '2025-09-30 22:21:48.544003', 'step': 6756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:48.610796', 'step': 6756, 'epoch': 1} {'type': 'loss', 'content': 0.1099899485707283, 'timestamp': '2025-09-30 22:21:48.620544', 'step': 6757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:48.682394', 'step': 6757, 'epoch': 1} {'type': 'loss', 'content': 0.05294980853796005, 'timestamp': '2025-09-30 22:21:48.708852', 'step': 6758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:48.781226', 'step': 6758, 'epoch': 1} {'type': 'loss', 'content': 0.15258954465389252, 'timestamp': '2025-09-30 22:21:48.791499', 'step': 6759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:48.861916', 'step': 6759, 'epoch': 1} {'type': 'loss', 'content': 0.1301853507757187, 'timestamp': '2025-09-30 22:21:48.873735', 'step': 6760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:48.939127', 'step': 6760, 'epoch': 1} {'type': 'loss', 'content': 0.13599522411823273, 'timestamp': '2025-09-30 22:21:48.944531', 'step': 6761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:49.006125', 'step': 6761, 'epoch': 1} {'type': 'loss', 'content': 0.17818334698677063, 'timestamp': '2025-09-30 22:21:49.016145', 'step': 6762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:49.078183', 'step': 6762, 'epoch': 1} {'type': 'loss', 'content': 0.12332642078399658, 'timestamp': '2025-09-30 22:21:49.087016', 'step': 6763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:49.146103', 'step': 6763, 'epoch': 1} {'type': 'loss', 'content': 0.09267135709524155, 'timestamp': '2025-09-30 22:21:49.171944', 'step': 6764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:49.237395', 'step': 6764, 'epoch': 1} {'type': 'loss', 'content': 0.14219892024993896, 'timestamp': '2025-09-30 22:21:49.241954', 'step': 6765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:49.302767', 'step': 6765, 'epoch': 1} {'type': 'loss', 'content': 0.23872219026088715, 'timestamp': '2025-09-30 22:21:49.312536', 'step': 6766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:49.374797', 'step': 6766, 'epoch': 1} {'type': 'loss', 'content': 0.17488937079906464, 'timestamp': '2025-09-30 22:21:49.378973', 'step': 6767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:49.445771', 'step': 6767, 'epoch': 1} {'type': 'loss', 'content': 0.20914149284362793, 'timestamp': '2025-09-30 22:21:49.454352', 'step': 6768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:49.512042', 'step': 6768, 'epoch': 1} {'type': 'loss', 'content': 0.09206661581993103, 'timestamp': '2025-09-30 22:21:49.522112', 'step': 6769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:49.590653', 'step': 6769, 'epoch': 1} {'type': 'loss', 'content': 0.22908242046833038, 'timestamp': '2025-09-30 22:21:49.595715', 'step': 6770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:49.654973', 'step': 6770, 'epoch': 1} {'type': 'loss', 'content': 0.09484543651342392, 'timestamp': '2025-09-30 22:21:49.659025', 'step': 6771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:49.716048', 'step': 6771, 'epoch': 1} {'type': 'loss', 'content': 0.20549547672271729, 'timestamp': '2025-09-30 22:21:49.731692', 'step': 6772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:49.792423', 'step': 6772, 'epoch': 1} {'type': 'loss', 'content': 0.10572043806314468, 'timestamp': '2025-09-30 22:21:49.801217', 'step': 6773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:49.859103', 'step': 6773, 'epoch': 1} {'type': 'loss', 'content': 0.19575482606887817, 'timestamp': '2025-09-30 22:21:49.862660', 'step': 6774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:49.919561', 'step': 6774, 'epoch': 1} {'type': 'loss', 'content': 0.05813192203640938, 'timestamp': '2025-09-30 22:21:49.923523', 'step': 6775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:49.981114', 'step': 6775, 'epoch': 1} {'type': 'loss', 'content': 0.12099836766719818, 'timestamp': '2025-09-30 22:21:49.987272', 'step': 6776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:50.045964', 'step': 6776, 'epoch': 1} {'type': 'loss', 'content': 0.1213267371058464, 'timestamp': '2025-09-30 22:21:50.049010', 'step': 6777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:50.106804', 'step': 6777, 'epoch': 1} {'type': 'loss', 'content': 0.19093230366706848, 'timestamp': '2025-09-30 22:21:50.110101', 'step': 6778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:50.169100', 'step': 6778, 'epoch': 1} {'type': 'loss', 'content': 0.16997754573822021, 'timestamp': '2025-09-30 22:21:50.171878', 'step': 6779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:50.242039', 'step': 6779, 'epoch': 1} {'type': 'loss', 'content': 0.21068353950977325, 'timestamp': '2025-09-30 22:21:50.249183', 'step': 6780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:50.320619', 'step': 6780, 'epoch': 1} {'type': 'loss', 'content': 0.09586713463068008, 'timestamp': '2025-09-30 22:21:50.323904', 'step': 6781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:50.381352', 'step': 6781, 'epoch': 1} {'type': 'loss', 'content': 0.1150786280632019, 'timestamp': '2025-09-30 22:21:50.383643', 'step': 6782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:50.441541', 'step': 6782, 'epoch': 1} {'type': 'loss', 'content': 0.19323374330997467, 'timestamp': '2025-09-30 22:21:50.444856', 'step': 6783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:50.510950', 'step': 6783, 'epoch': 1} {'type': 'loss', 'content': 0.1486397683620453, 'timestamp': '2025-09-30 22:21:50.519073', 'step': 6784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:50.577869', 'step': 6784, 'epoch': 1} {'type': 'loss', 'content': 0.1619400680065155, 'timestamp': '2025-09-30 22:21:50.582361', 'step': 6785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:50.653556', 'step': 6785, 'epoch': 1} {'type': 'loss', 'content': 0.12037859857082367, 'timestamp': '2025-09-30 22:21:50.657444', 'step': 6786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:50.716829', 'step': 6786, 'epoch': 1} {'type': 'loss', 'content': 0.29368165135383606, 'timestamp': '2025-09-30 22:21:50.722516', 'step': 6787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:50.781469', 'step': 6787, 'epoch': 1} {'type': 'loss', 'content': 0.10447705537080765, 'timestamp': '2025-09-30 22:21:50.788088', 'step': 6788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:50.847775', 'step': 6788, 'epoch': 1} {'type': 'loss', 'content': 0.13989627361297607, 'timestamp': '2025-09-30 22:21:50.851456', 'step': 6789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:50.910856', 'step': 6789, 'epoch': 1} {'type': 'loss', 'content': 0.20074377954006195, 'timestamp': '2025-09-30 22:21:50.914825', 'step': 6790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:50.974459', 'step': 6790, 'epoch': 1} {'type': 'loss', 'content': 0.1326151341199875, 'timestamp': '2025-09-30 22:21:50.977705', 'step': 6791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:51.040003', 'step': 6791, 'epoch': 1} {'type': 'loss', 'content': 0.16270042955875397, 'timestamp': '2025-09-30 22:21:51.047941', 'step': 6792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:51.104804', 'step': 6792, 'epoch': 1} {'type': 'loss', 'content': 0.11464676260948181, 'timestamp': '2025-09-30 22:21:51.108581', 'step': 6793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:51.172446', 'step': 6793, 'epoch': 1} {'type': 'loss', 'content': 0.16739045083522797, 'timestamp': '2025-09-30 22:21:51.177264', 'step': 6794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:51.235331', 'step': 6794, 'epoch': 1} {'type': 'loss', 'content': 0.09643454849720001, 'timestamp': '2025-09-30 22:21:51.238576', 'step': 6795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:51.296224', 'step': 6795, 'epoch': 1} {'type': 'loss', 'content': 0.12335158884525299, 'timestamp': '2025-09-30 22:21:51.302773', 'step': 6796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:51.360103', 'step': 6796, 'epoch': 1} {'type': 'loss', 'content': 0.2166179120540619, 'timestamp': '2025-09-30 22:21:51.371516', 'step': 6797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:51.444193', 'step': 6797, 'epoch': 1} {'type': 'loss', 'content': 0.1956215649843216, 'timestamp': '2025-09-30 22:21:51.448633', 'step': 6798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:51.507781', 'step': 6798, 'epoch': 1} {'type': 'loss', 'content': 0.12973666191101074, 'timestamp': '2025-09-30 22:21:51.512326', 'step': 6799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:51.574342', 'step': 6799, 'epoch': 1} {'type': 'loss', 'content': 0.23722881078720093, 'timestamp': '2025-09-30 22:21:51.582257', 'step': 6800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:51.640764', 'step': 6800, 'epoch': 1} {'type': 'loss', 'content': 0.1455877423286438, 'timestamp': '2025-09-30 22:21:51.645657', 'step': 6801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:51.722669', 'step': 6801, 'epoch': 1} {'type': 'loss', 'content': 0.22792960703372955, 'timestamp': '2025-09-30 22:21:51.727423', 'step': 6802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:51.790419', 'step': 6802, 'epoch': 1} {'type': 'loss', 'content': 0.22698557376861572, 'timestamp': '2025-09-30 22:21:51.793127', 'step': 6803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:51.851068', 'step': 6803, 'epoch': 1} {'type': 'loss', 'content': 0.12014324218034744, 'timestamp': '2025-09-30 22:21:51.861587', 'step': 6804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:51.921187', 'step': 6804, 'epoch': 1} {'type': 'loss', 'content': 0.20883402228355408, 'timestamp': '2025-09-30 22:21:51.924754', 'step': 6805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:51.980925', 'step': 6805, 'epoch': 1} {'type': 'loss', 'content': 0.12601228058338165, 'timestamp': '2025-09-30 22:21:51.983954', 'step': 6806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.044734', 'step': 6806, 'epoch': 1} {'type': 'loss', 'content': 0.09518478065729141, 'timestamp': '2025-09-30 22:21:52.048181', 'step': 6807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:52.105602', 'step': 6807, 'epoch': 1} {'type': 'loss', 'content': 0.15832796692848206, 'timestamp': '2025-09-30 22:21:52.113755', 'step': 6808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:52.174970', 'step': 6808, 'epoch': 1} {'type': 'loss', 'content': 0.14672811329364777, 'timestamp': '2025-09-30 22:21:52.178578', 'step': 6809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:52.242210', 'step': 6809, 'epoch': 1} {'type': 'loss', 'content': 0.19377635419368744, 'timestamp': '2025-09-30 22:21:52.245748', 'step': 6810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:52.307373', 'step': 6810, 'epoch': 1} {'type': 'loss', 'content': 0.12347720563411713, 'timestamp': '2025-09-30 22:21:52.310544', 'step': 6811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:52.371899', 'step': 6811, 'epoch': 1} {'type': 'loss', 'content': 0.18715591728687286, 'timestamp': '2025-09-30 22:21:52.379615', 'step': 6812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.439745', 'step': 6812, 'epoch': 1} {'type': 'loss', 'content': 0.09584152698516846, 'timestamp': '2025-09-30 22:21:52.444163', 'step': 6813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.501322', 'step': 6813, 'epoch': 1} {'type': 'loss', 'content': 0.09110080450773239, 'timestamp': '2025-09-30 22:21:52.506912', 'step': 6814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.565053', 'step': 6814, 'epoch': 1} {'type': 'loss', 'content': 0.19778737425804138, 'timestamp': '2025-09-30 22:21:52.568802', 'step': 6815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.629715', 'step': 6815, 'epoch': 1} {'type': 'loss', 'content': 0.10784020274877548, 'timestamp': '2025-09-30 22:21:52.637362', 'step': 6816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.697181', 'step': 6816, 'epoch': 1} {'type': 'loss', 'content': 0.1600734293460846, 'timestamp': '2025-09-30 22:21:52.703673', 'step': 6817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.767633', 'step': 6817, 'epoch': 1} {'type': 'loss', 'content': 0.18162241578102112, 'timestamp': '2025-09-30 22:21:52.773187', 'step': 6818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:52.832116', 'step': 6818, 'epoch': 1} {'type': 'loss', 'content': 0.23394735157489777, 'timestamp': '2025-09-30 22:21:52.835929', 'step': 6819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.893554', 'step': 6819, 'epoch': 1} {'type': 'loss', 'content': 0.09758573025465012, 'timestamp': '2025-09-30 22:21:52.901002', 'step': 6820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:52.957867', 'step': 6820, 'epoch': 1} {'type': 'loss', 'content': 0.15513314306735992, 'timestamp': '2025-09-30 22:21:52.961975', 'step': 6821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:53.019260', 'step': 6821, 'epoch': 1} {'type': 'loss', 'content': 0.12379253655672073, 'timestamp': '2025-09-30 22:21:53.022592', 'step': 6822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:53.081128', 'step': 6822, 'epoch': 1} {'type': 'loss', 'content': 0.12007173895835876, 'timestamp': '2025-09-30 22:21:53.084418', 'step': 6823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:53.141389', 'step': 6823, 'epoch': 1} {'type': 'loss', 'content': 0.17486076056957245, 'timestamp': '2025-09-30 22:21:53.158861', 'step': 6824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:53.217241', 'step': 6824, 'epoch': 1} {'type': 'loss', 'content': 0.16911780834197998, 'timestamp': '2025-09-30 22:21:53.230591', 'step': 6825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:53.289223', 'step': 6825, 'epoch': 1} {'type': 'loss', 'content': 0.1952742040157318, 'timestamp': '2025-09-30 22:21:53.293245', 'step': 6826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:53.352595', 'step': 6826, 'epoch': 1} {'type': 'loss', 'content': 0.23198240995407104, 'timestamp': '2025-09-30 22:21:53.358896', 'step': 6827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:53.418012', 'step': 6827, 'epoch': 1} {'type': 'loss', 'content': 0.09722360968589783, 'timestamp': '2025-09-30 22:21:53.425900', 'step': 6828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:53.483788', 'step': 6828, 'epoch': 1} {'type': 'loss', 'content': 0.10538946837186813, 'timestamp': '2025-09-30 22:21:53.497961', 'step': 6829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:53.557472', 'step': 6829, 'epoch': 1} {'type': 'loss', 'content': 0.10510165244340897, 'timestamp': '2025-09-30 22:21:53.561120', 'step': 6830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:53.631824', 'step': 6830, 'epoch': 1} {'type': 'loss', 'content': 0.2494562417268753, 'timestamp': '2025-09-30 22:21:53.644252', 'step': 6831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:53.701765', 'step': 6831, 'epoch': 1} {'type': 'loss', 'content': 0.0897890254855156, 'timestamp': '2025-09-30 22:21:53.709013', 'step': 6832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:53.766869', 'step': 6832, 'epoch': 1} {'type': 'loss', 'content': 0.19860349595546722, 'timestamp': '2025-09-30 22:21:53.771329', 'step': 6833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:53.828561', 'step': 6833, 'epoch': 1} {'type': 'loss', 'content': 0.1505751758813858, 'timestamp': '2025-09-30 22:21:53.831563', 'step': 6834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:53.889328', 'step': 6834, 'epoch': 1} {'type': 'loss', 'content': 0.1264198124408722, 'timestamp': '2025-09-30 22:21:53.893708', 'step': 6835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:53.952119', 'step': 6835, 'epoch': 1} {'type': 'loss', 'content': 0.16024211049079895, 'timestamp': '2025-09-30 22:21:53.958340', 'step': 6836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:54.016404', 'step': 6836, 'epoch': 1} {'type': 'loss', 'content': 0.18269406259059906, 'timestamp': '2025-09-30 22:21:54.019794', 'step': 6837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:54.082139', 'step': 6837, 'epoch': 1} {'type': 'loss', 'content': 0.13006728887557983, 'timestamp': '2025-09-30 22:21:54.085426', 'step': 6838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:54.144435', 'step': 6838, 'epoch': 1} {'type': 'loss', 'content': 0.1718655228614807, 'timestamp': '2025-09-30 22:21:54.156667', 'step': 6839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:54.213921', 'step': 6839, 'epoch': 1} {'type': 'loss', 'content': 0.149881049990654, 'timestamp': '2025-09-30 22:21:54.220581', 'step': 6840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:54.277435', 'step': 6840, 'epoch': 1} {'type': 'loss', 'content': 0.10957898944616318, 'timestamp': '2025-09-30 22:21:54.287845', 'step': 6841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:54.346087', 'step': 6841, 'epoch': 1} {'type': 'loss', 'content': 0.1356801837682724, 'timestamp': '2025-09-30 22:21:54.351241', 'step': 6842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:54.410054', 'step': 6842, 'epoch': 1} {'type': 'loss', 'content': 0.14915725588798523, 'timestamp': '2025-09-30 22:21:54.414631', 'step': 6843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:54.472440', 'step': 6843, 'epoch': 1} {'type': 'loss', 'content': 0.12853123247623444, 'timestamp': '2025-09-30 22:21:54.487655', 'step': 6844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:54.545390', 'step': 6844, 'epoch': 1} {'type': 'loss', 'content': 0.1412123143672943, 'timestamp': '2025-09-30 22:21:54.557480', 'step': 6845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:54.619808', 'step': 6845, 'epoch': 1} {'type': 'loss', 'content': 0.09997139871120453, 'timestamp': '2025-09-30 22:21:54.628315', 'step': 6846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:54.687114', 'step': 6846, 'epoch': 1} {'type': 'loss', 'content': 0.11691053956747055, 'timestamp': '2025-09-30 22:21:54.697186', 'step': 6847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:54.755708', 'step': 6847, 'epoch': 1} {'type': 'loss', 'content': 0.11915244907140732, 'timestamp': '2025-09-30 22:21:54.763769', 'step': 6848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:54.822820', 'step': 6848, 'epoch': 1} {'type': 'loss', 'content': 0.14479391276836395, 'timestamp': '2025-09-30 22:21:54.826946', 'step': 6849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:54.893865', 'step': 6849, 'epoch': 1} {'type': 'loss', 'content': 0.08803057670593262, 'timestamp': '2025-09-30 22:21:54.897526', 'step': 6850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:54.966303', 'step': 6850, 'epoch': 1} {'type': 'loss', 'content': 0.1377672404050827, 'timestamp': '2025-09-30 22:21:54.970221', 'step': 6851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:55.028701', 'step': 6851, 'epoch': 1} {'type': 'loss', 'content': 0.11770528554916382, 'timestamp': '2025-09-30 22:21:55.036860', 'step': 6852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:55.093946', 'step': 6852, 'epoch': 1} {'type': 'loss', 'content': 0.14490456879138947, 'timestamp': '2025-09-30 22:21:55.097758', 'step': 6853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:55.155218', 'step': 6853, 'epoch': 1} {'type': 'loss', 'content': 0.21019096672534943, 'timestamp': '2025-09-30 22:21:55.158775', 'step': 6854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:55.221128', 'step': 6854, 'epoch': 1} {'type': 'loss', 'content': 0.20272444188594818, 'timestamp': '2025-09-30 22:21:55.233460', 'step': 6855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:55.321833', 'step': 6855, 'epoch': 1} {'type': 'loss', 'content': 0.20951366424560547, 'timestamp': '2025-09-30 22:21:55.330386', 'step': 6856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:55.390232', 'step': 6856, 'epoch': 1} {'type': 'loss', 'content': 0.09535352885723114, 'timestamp': '2025-09-30 22:21:55.396755', 'step': 6857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:55.462469', 'step': 6857, 'epoch': 1} {'type': 'loss', 'content': 0.10934807360172272, 'timestamp': '2025-09-30 22:21:55.466593', 'step': 6858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:55.541148', 'step': 6858, 'epoch': 1} {'type': 'loss', 'content': 0.12187251448631287, 'timestamp': '2025-09-30 22:21:55.544244', 'step': 6859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:55.603075', 'step': 6859, 'epoch': 1} {'type': 'loss', 'content': 0.1324223279953003, 'timestamp': '2025-09-30 22:21:55.611170', 'step': 6860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:55.668583', 'step': 6860, 'epoch': 1} {'type': 'loss', 'content': 0.1791314035654068, 'timestamp': '2025-09-30 22:21:55.672921', 'step': 6861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:55.732832', 'step': 6861, 'epoch': 1} {'type': 'loss', 'content': 0.15189526975154877, 'timestamp': '2025-09-30 22:21:55.738039', 'step': 6862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:55.798528', 'step': 6862, 'epoch': 1} {'type': 'loss', 'content': 0.2539636492729187, 'timestamp': '2025-09-30 22:21:55.802211', 'step': 6863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:55.862229', 'step': 6863, 'epoch': 1} {'type': 'loss', 'content': 0.1912165731191635, 'timestamp': '2025-09-30 22:21:55.870258', 'step': 6864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:55.927178', 'step': 6864, 'epoch': 1} {'type': 'loss', 'content': 0.1079748123884201, 'timestamp': '2025-09-30 22:21:55.931559', 'step': 6865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:55.992255', 'step': 6865, 'epoch': 1} {'type': 'loss', 'content': 0.1339687556028366, 'timestamp': '2025-09-30 22:21:56.005888', 'step': 6866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:56.064951', 'step': 6866, 'epoch': 1} {'type': 'loss', 'content': 0.13552607595920563, 'timestamp': '2025-09-30 22:21:56.069146', 'step': 6867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:56.128310', 'step': 6867, 'epoch': 1} {'type': 'loss', 'content': 0.12203015387058258, 'timestamp': '2025-09-30 22:21:56.145283', 'step': 6868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:56.203306', 'step': 6868, 'epoch': 1} {'type': 'loss', 'content': 0.15378810465335846, 'timestamp': '2025-09-30 22:21:56.206930', 'step': 6869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:56.288329', 'step': 6869, 'epoch': 1} {'type': 'loss', 'content': 0.1446722149848938, 'timestamp': '2025-09-30 22:21:56.293051', 'step': 6870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:56.365778', 'step': 6870, 'epoch': 1} {'type': 'loss', 'content': 0.18559427559375763, 'timestamp': '2025-09-30 22:21:56.383036', 'step': 6871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:56.446364', 'step': 6871, 'epoch': 1} {'type': 'loss', 'content': 0.1631009578704834, 'timestamp': '2025-09-30 22:21:56.455662', 'step': 6872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:56.518694', 'step': 6872, 'epoch': 1} {'type': 'loss', 'content': 0.1598384976387024, 'timestamp': '2025-09-30 22:21:56.523014', 'step': 6873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:56.585476', 'step': 6873, 'epoch': 1} {'type': 'loss', 'content': 0.14510217308998108, 'timestamp': '2025-09-30 22:21:56.590838', 'step': 6874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:56.657798', 'step': 6874, 'epoch': 1} {'type': 'loss', 'content': 0.23293112218379974, 'timestamp': '2025-09-30 22:21:56.661272', 'step': 6875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:56.740876', 'step': 6875, 'epoch': 1} {'type': 'loss', 'content': 0.14537203311920166, 'timestamp': '2025-09-30 22:21:56.760873', 'step': 6876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:56.819696', 'step': 6876, 'epoch': 1} {'type': 'loss', 'content': 0.13673923909664154, 'timestamp': '2025-09-30 22:21:56.824999', 'step': 6877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:56.887839', 'step': 6877, 'epoch': 1} {'type': 'loss', 'content': 0.2078433334827423, 'timestamp': '2025-09-30 22:21:56.892927', 'step': 6878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:56.953404', 'step': 6878, 'epoch': 1} {'type': 'loss', 'content': 0.14899519085884094, 'timestamp': '2025-09-30 22:21:56.972607', 'step': 6879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:57.031366', 'step': 6879, 'epoch': 1} {'type': 'loss', 'content': 0.1342221200466156, 'timestamp': '2025-09-30 22:21:57.039674', 'step': 6880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:57.104939', 'step': 6880, 'epoch': 1} {'type': 'loss', 'content': 0.15357238054275513, 'timestamp': '2025-09-30 22:21:57.122287', 'step': 6881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:57.193455', 'step': 6881, 'epoch': 1} {'type': 'loss', 'content': 0.2931711673736572, 'timestamp': '2025-09-30 22:21:57.196763', 'step': 6882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:57.256573', 'step': 6882, 'epoch': 1} {'type': 'loss', 'content': 0.1338648796081543, 'timestamp': '2025-09-30 22:21:57.260068', 'step': 6883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:57.319334', 'step': 6883, 'epoch': 1} {'type': 'loss', 'content': 0.16103653609752655, 'timestamp': '2025-09-30 22:21:57.326655', 'step': 6884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:57.402418', 'step': 6884, 'epoch': 1} {'type': 'loss', 'content': 0.13209877908229828, 'timestamp': '2025-09-30 22:21:57.405987', 'step': 6885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:57.464477', 'step': 6885, 'epoch': 1} {'type': 'loss', 'content': 0.1271810382604599, 'timestamp': '2025-09-30 22:21:57.474804', 'step': 6886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:57.535682', 'step': 6886, 'epoch': 1} {'type': 'loss', 'content': 0.13406208157539368, 'timestamp': '2025-09-30 22:21:57.540547', 'step': 6887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:21:57.600335', 'step': 6887, 'epoch': 1} {'type': 'loss', 'content': 0.12899872660636902, 'timestamp': '2025-09-30 22:21:57.612860', 'step': 6888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:57.683185', 'step': 6888, 'epoch': 1} {'type': 'loss', 'content': 0.10571356862783432, 'timestamp': '2025-09-30 22:21:57.687177', 'step': 6889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:57.744394', 'step': 6889, 'epoch': 1} {'type': 'loss', 'content': 0.1085473895072937, 'timestamp': '2025-09-30 22:21:57.748542', 'step': 6890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:57.806784', 'step': 6890, 'epoch': 1} {'type': 'loss', 'content': 0.1457882970571518, 'timestamp': '2025-09-30 22:21:57.810330', 'step': 6891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:57.868520', 'step': 6891, 'epoch': 1} {'type': 'loss', 'content': 0.1686701625585556, 'timestamp': '2025-09-30 22:21:57.875539', 'step': 6892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:57.942059', 'step': 6892, 'epoch': 1} {'type': 'loss', 'content': 0.22633762657642365, 'timestamp': '2025-09-30 22:21:57.944649', 'step': 6893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:58.006194', 'step': 6893, 'epoch': 1} {'type': 'loss', 'content': 0.1013975664973259, 'timestamp': '2025-09-30 22:21:58.019283', 'step': 6894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:58.089212', 'step': 6894, 'epoch': 1} {'type': 'loss', 'content': 0.1565467119216919, 'timestamp': '2025-09-30 22:21:58.093494', 'step': 6895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:58.173684', 'step': 6895, 'epoch': 1} {'type': 'loss', 'content': 0.18941138684749603, 'timestamp': '2025-09-30 22:21:58.182464', 'step': 6896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:58.251484', 'step': 6896, 'epoch': 1} {'type': 'loss', 'content': 0.12791432440280914, 'timestamp': '2025-09-30 22:21:58.255734', 'step': 6897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:58.317657', 'step': 6897, 'epoch': 1} {'type': 'loss', 'content': 0.16237542033195496, 'timestamp': '2025-09-30 22:21:58.322335', 'step': 6898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:58.383116', 'step': 6898, 'epoch': 1} {'type': 'loss', 'content': 0.2210405170917511, 'timestamp': '2025-09-30 22:21:58.392230', 'step': 6899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:58.450697', 'step': 6899, 'epoch': 1} {'type': 'loss', 'content': 0.1613009124994278, 'timestamp': '2025-09-30 22:21:58.460304', 'step': 6900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:58.519634', 'step': 6900, 'epoch': 1} {'type': 'loss', 'content': 0.13340897858142853, 'timestamp': '2025-09-30 22:21:58.523946', 'step': 6901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:58.581186', 'step': 6901, 'epoch': 1} {'type': 'loss', 'content': 0.3040591776371002, 'timestamp': '2025-09-30 22:21:58.584012', 'step': 6902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:58.643344', 'step': 6902, 'epoch': 1} {'type': 'loss', 'content': 0.21584898233413696, 'timestamp': '2025-09-30 22:21:58.647422', 'step': 6903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:58.707193', 'step': 6903, 'epoch': 1} {'type': 'loss', 'content': 0.13400261104106903, 'timestamp': '2025-09-30 22:21:58.713523', 'step': 6904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:58.770548', 'step': 6904, 'epoch': 1} {'type': 'loss', 'content': 0.20610830187797546, 'timestamp': '2025-09-30 22:21:58.773231', 'step': 6905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:58.831581', 'step': 6905, 'epoch': 1} {'type': 'loss', 'content': 0.20006708800792694, 'timestamp': '2025-09-30 22:21:58.835474', 'step': 6906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:21:58.895019', 'step': 6906, 'epoch': 1} {'type': 'loss', 'content': 0.16536910831928253, 'timestamp': '2025-09-30 22:21:58.898136', 'step': 6907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:58.957635', 'step': 6907, 'epoch': 1} {'type': 'loss', 'content': 0.10114606469869614, 'timestamp': '2025-09-30 22:21:58.964604', 'step': 6908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:59.022601', 'step': 6908, 'epoch': 1} {'type': 'loss', 'content': 0.14303922653198242, 'timestamp': '2025-09-30 22:21:59.026072', 'step': 6909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:59.083389', 'step': 6909, 'epoch': 1} {'type': 'loss', 'content': 0.20014703273773193, 'timestamp': '2025-09-30 22:21:59.086535', 'step': 6910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:59.145911', 'step': 6910, 'epoch': 1} {'type': 'loss', 'content': 0.15229763090610504, 'timestamp': '2025-09-30 22:21:59.149181', 'step': 6911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:59.207358', 'step': 6911, 'epoch': 1} {'type': 'loss', 'content': 0.10981883853673935, 'timestamp': '2025-09-30 22:21:59.214041', 'step': 6912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:59.271127', 'step': 6912, 'epoch': 1} {'type': 'loss', 'content': 0.16037410497665405, 'timestamp': '2025-09-30 22:21:59.274416', 'step': 6913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:59.343291', 'step': 6913, 'epoch': 1} {'type': 'loss', 'content': 0.18014463782310486, 'timestamp': '2025-09-30 22:21:59.347963', 'step': 6914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:59.405844', 'step': 6914, 'epoch': 1} {'type': 'loss', 'content': 0.2052675187587738, 'timestamp': '2025-09-30 22:21:59.409379', 'step': 6915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:59.466100', 'step': 6915, 'epoch': 1} {'type': 'loss', 'content': 0.10347859561443329, 'timestamp': '2025-09-30 22:21:59.473593', 'step': 6916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:21:59.530293', 'step': 6916, 'epoch': 1} {'type': 'loss', 'content': 0.10526285320520401, 'timestamp': '2025-09-30 22:21:59.533415', 'step': 6917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:59.594046', 'step': 6917, 'epoch': 1} {'type': 'loss', 'content': 0.09393276274204254, 'timestamp': '2025-09-30 22:21:59.597452', 'step': 6918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:21:59.656271', 'step': 6918, 'epoch': 1} {'type': 'loss', 'content': 0.14427635073661804, 'timestamp': '2025-09-30 22:21:59.660350', 'step': 6919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:59.722275', 'step': 6919, 'epoch': 1} {'type': 'loss', 'content': 0.11005803197622299, 'timestamp': '2025-09-30 22:21:59.730406', 'step': 6920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:59.788442', 'step': 6920, 'epoch': 1} {'type': 'loss', 'content': 0.09736086428165436, 'timestamp': '2025-09-30 22:21:59.806162', 'step': 6921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:21:59.865906', 'step': 6921, 'epoch': 1} {'type': 'loss', 'content': 0.12744295597076416, 'timestamp': '2025-09-30 22:21:59.871883', 'step': 6922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:21:59.935246', 'step': 6922, 'epoch': 1} {'type': 'loss', 'content': 0.16498297452926636, 'timestamp': '2025-09-30 22:21:59.939344', 'step': 6923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:21:59.996956', 'step': 6923, 'epoch': 1} {'type': 'loss', 'content': 0.12504476308822632, 'timestamp': '2025-09-30 22:22:00.005430', 'step': 6924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:00.062498', 'step': 6924, 'epoch': 1} {'type': 'loss', 'content': 0.13749396800994873, 'timestamp': '2025-09-30 22:22:00.067356', 'step': 6925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:22:00.130946', 'step': 6925, 'epoch': 1} {'type': 'loss', 'content': 0.13848719000816345, 'timestamp': '2025-09-30 22:22:00.134257', 'step': 6926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:00.196458', 'step': 6926, 'epoch': 1} {'type': 'loss', 'content': 0.16943252086639404, 'timestamp': '2025-09-30 22:22:00.201007', 'step': 6927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:00.257676', 'step': 6927, 'epoch': 1} {'type': 'loss', 'content': 0.18034027516841888, 'timestamp': '2025-09-30 22:22:00.266381', 'step': 6928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:00.326313', 'step': 6928, 'epoch': 1} {'type': 'loss', 'content': 0.15422216057777405, 'timestamp': '2025-09-30 22:22:00.331025', 'step': 6929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:00.389462', 'step': 6929, 'epoch': 1} {'type': 'loss', 'content': 0.19620369374752045, 'timestamp': '2025-09-30 22:22:00.392683', 'step': 6930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:00.457707', 'step': 6930, 'epoch': 1} {'type': 'loss', 'content': 0.11907817423343658, 'timestamp': '2025-09-30 22:22:00.460776', 'step': 6931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:00.518225', 'step': 6931, 'epoch': 1} {'type': 'loss', 'content': 0.073113352060318, 'timestamp': '2025-09-30 22:22:00.527215', 'step': 6932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:00.585864', 'step': 6932, 'epoch': 1} {'type': 'loss', 'content': 0.07501593977212906, 'timestamp': '2025-09-30 22:22:00.589676', 'step': 6933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:00.647111', 'step': 6933, 'epoch': 1} {'type': 'loss', 'content': 0.13224990665912628, 'timestamp': '2025-09-30 22:22:00.649912', 'step': 6934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:00.720398', 'step': 6934, 'epoch': 1} {'type': 'loss', 'content': 0.10130525380373001, 'timestamp': '2025-09-30 22:22:00.723144', 'step': 6935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:00.804775', 'step': 6935, 'epoch': 1} {'type': 'loss', 'content': 0.14487943053245544, 'timestamp': '2025-09-30 22:22:00.811322', 'step': 6936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:00.868725', 'step': 6936, 'epoch': 1} {'type': 'loss', 'content': 0.11110947281122208, 'timestamp': '2025-09-30 22:22:00.870998', 'step': 6937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:00.934738', 'step': 6937, 'epoch': 1} {'type': 'loss', 'content': 0.09448953717947006, 'timestamp': '2025-09-30 22:22:00.937777', 'step': 6938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:22:01.006230', 'step': 6938, 'epoch': 1} {'type': 'loss', 'content': 0.16888223588466644, 'timestamp': '2025-09-30 22:22:01.012849', 'step': 6939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.071594', 'step': 6939, 'epoch': 1} {'type': 'loss', 'content': 0.18074113130569458, 'timestamp': '2025-09-30 22:22:01.078840', 'step': 6940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.143347', 'step': 6940, 'epoch': 1} {'type': 'loss', 'content': 0.17935378849506378, 'timestamp': '2025-09-30 22:22:01.148097', 'step': 6941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.208422', 'step': 6941, 'epoch': 1} {'type': 'loss', 'content': 0.18419034779071808, 'timestamp': '2025-09-30 22:22:01.210914', 'step': 6942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.277276', 'step': 6942, 'epoch': 1} {'type': 'loss', 'content': 0.15651065111160278, 'timestamp': '2025-09-30 22:22:01.281535', 'step': 6943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:01.339769', 'step': 6943, 'epoch': 1} {'type': 'loss', 'content': 0.1123453751206398, 'timestamp': '2025-09-30 22:22:01.348460', 'step': 6944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.404740', 'step': 6944, 'epoch': 1} {'type': 'loss', 'content': 0.18013276159763336, 'timestamp': '2025-09-30 22:22:01.408098', 'step': 6945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.471514', 'step': 6945, 'epoch': 1} {'type': 'loss', 'content': 0.11779869347810745, 'timestamp': '2025-09-30 22:22:01.474128', 'step': 6946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.531709', 'step': 6946, 'epoch': 1} {'type': 'loss', 'content': 0.149732306599617, 'timestamp': '2025-09-30 22:22:01.535409', 'step': 6947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:01.599330', 'step': 6947, 'epoch': 1} {'type': 'loss', 'content': 0.10192813724279404, 'timestamp': '2025-09-30 22:22:01.610462', 'step': 6948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.674008', 'step': 6948, 'epoch': 1} {'type': 'loss', 'content': 0.15867967903614044, 'timestamp': '2025-09-30 22:22:01.678779', 'step': 6949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:01.736588', 'step': 6949, 'epoch': 1} {'type': 'loss', 'content': 0.2331830859184265, 'timestamp': '2025-09-30 22:22:01.740133', 'step': 6950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:01.807317', 'step': 6950, 'epoch': 1} {'type': 'loss', 'content': 0.14362406730651855, 'timestamp': '2025-09-30 22:22:01.817468', 'step': 6951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:01.878373', 'step': 6951, 'epoch': 1} {'type': 'loss', 'content': 0.12796428799629211, 'timestamp': '2025-09-30 22:22:01.891109', 'step': 6952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:01.952232', 'step': 6952, 'epoch': 1} {'type': 'loss', 'content': 0.16150756180286407, 'timestamp': '2025-09-30 22:22:01.956186', 'step': 6953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:02.018355', 'step': 6953, 'epoch': 1} {'type': 'loss', 'content': 0.1384362131357193, 'timestamp': '2025-09-30 22:22:02.020820', 'step': 6954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:02.088350', 'step': 6954, 'epoch': 1} {'type': 'loss', 'content': 0.20453117787837982, 'timestamp': '2025-09-30 22:22:02.091427', 'step': 6955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:02.156526', 'step': 6955, 'epoch': 1} {'type': 'loss', 'content': 0.16898611187934875, 'timestamp': '2025-09-30 22:22:02.163931', 'step': 6956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:02.222229', 'step': 6956, 'epoch': 1} {'type': 'loss', 'content': 0.29772278666496277, 'timestamp': '2025-09-30 22:22:02.226243', 'step': 6957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:02.284679', 'step': 6957, 'epoch': 1} {'type': 'loss', 'content': 0.12585651874542236, 'timestamp': '2025-09-30 22:22:02.287812', 'step': 6958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:02.347288', 'step': 6958, 'epoch': 1} {'type': 'loss', 'content': 0.2058991938829422, 'timestamp': '2025-09-30 22:22:02.349968', 'step': 6959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:02.408817', 'step': 6959, 'epoch': 1} {'type': 'loss', 'content': 0.15711630880832672, 'timestamp': '2025-09-30 22:22:02.415770', 'step': 6960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:02.472785', 'step': 6960, 'epoch': 1} {'type': 'loss', 'content': 0.1793832927942276, 'timestamp': '2025-09-30 22:22:02.475732', 'step': 6961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:02.533960', 'step': 6961, 'epoch': 1} {'type': 'loss', 'content': 0.16556967794895172, 'timestamp': '2025-09-30 22:22:02.538734', 'step': 6962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:02.595542', 'step': 6962, 'epoch': 1} {'type': 'loss', 'content': 0.12448393553495407, 'timestamp': '2025-09-30 22:22:02.598933', 'step': 6963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:02.658867', 'step': 6963, 'epoch': 1} {'type': 'loss', 'content': 0.24620960652828217, 'timestamp': '2025-09-30 22:22:02.665330', 'step': 6964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:02.724419', 'step': 6964, 'epoch': 1} {'type': 'loss', 'content': 0.16722041368484497, 'timestamp': '2025-09-30 22:22:02.727227', 'step': 6965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:02.791661', 'step': 6965, 'epoch': 1} {'type': 'loss', 'content': 0.10793407261371613, 'timestamp': '2025-09-30 22:22:02.795837', 'step': 6966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:02.854833', 'step': 6966, 'epoch': 1} {'type': 'loss', 'content': 0.1310187131166458, 'timestamp': '2025-09-30 22:22:02.857193', 'step': 6967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:02.914438', 'step': 6967, 'epoch': 1} {'type': 'loss', 'content': 0.1886097639799118, 'timestamp': '2025-09-30 22:22:02.920396', 'step': 6968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:02.977088', 'step': 6968, 'epoch': 1} {'type': 'loss', 'content': 0.07089849561452866, 'timestamp': '2025-09-30 22:22:02.979660', 'step': 6969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:03.036779', 'step': 6969, 'epoch': 1} {'type': 'loss', 'content': 0.14740562438964844, 'timestamp': '2025-09-30 22:22:03.039267', 'step': 6970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:03.095820', 'step': 6970, 'epoch': 1} {'type': 'loss', 'content': 0.13469043374061584, 'timestamp': '2025-09-30 22:22:03.099449', 'step': 6971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:03.157762', 'step': 6971, 'epoch': 1} {'type': 'loss', 'content': 0.13394448161125183, 'timestamp': '2025-09-30 22:22:03.165049', 'step': 6972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:03.226499', 'step': 6972, 'epoch': 1} {'type': 'loss', 'content': 0.2928828299045563, 'timestamp': '2025-09-30 22:22:03.238633', 'step': 6973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:03.297248', 'step': 6973, 'epoch': 1} {'type': 'loss', 'content': 0.17511457204818726, 'timestamp': '2025-09-30 22:22:03.300933', 'step': 6974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:03.358759', 'step': 6974, 'epoch': 1} {'type': 'loss', 'content': 0.18890739977359772, 'timestamp': '2025-09-30 22:22:03.361232', 'step': 6975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:03.418684', 'step': 6975, 'epoch': 1} {'type': 'loss', 'content': 0.1510617882013321, 'timestamp': '2025-09-30 22:22:03.424834', 'step': 6976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:03.481913', 'step': 6976, 'epoch': 1} {'type': 'loss', 'content': 0.21297551691532135, 'timestamp': '2025-09-30 22:22:03.486653', 'step': 6977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:03.544255', 'step': 6977, 'epoch': 1} {'type': 'loss', 'content': 0.26520416140556335, 'timestamp': '2025-09-30 22:22:03.546912', 'step': 6978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:03.604411', 'step': 6978, 'epoch': 1} {'type': 'loss', 'content': 0.10866432636976242, 'timestamp': '2025-09-30 22:22:03.607653', 'step': 6979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:03.664545', 'step': 6979, 'epoch': 1} {'type': 'loss', 'content': 0.1499636471271515, 'timestamp': '2025-09-30 22:22:03.672113', 'step': 6980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:03.728939', 'step': 6980, 'epoch': 1} {'type': 'loss', 'content': 0.13848145306110382, 'timestamp': '2025-09-30 22:22:03.732877', 'step': 6981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:03.790656', 'step': 6981, 'epoch': 1} {'type': 'loss', 'content': 0.08112628757953644, 'timestamp': '2025-09-30 22:22:03.793544', 'step': 6982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:03.859684', 'step': 6982, 'epoch': 1} {'type': 'loss', 'content': 0.19063323736190796, 'timestamp': '2025-09-30 22:22:03.862483', 'step': 6983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:03.926827', 'step': 6983, 'epoch': 1} {'type': 'loss', 'content': 0.10789458453655243, 'timestamp': '2025-09-30 22:22:03.932760', 'step': 6984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:03.990692', 'step': 6984, 'epoch': 1} {'type': 'loss', 'content': 0.11742767691612244, 'timestamp': '2025-09-30 22:22:04.003286', 'step': 6985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:04.084605', 'step': 6985, 'epoch': 1} {'type': 'loss', 'content': 0.2569711208343506, 'timestamp': '2025-09-30 22:22:04.087256', 'step': 6986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:04.144555', 'step': 6986, 'epoch': 1} {'type': 'loss', 'content': 0.20182564854621887, 'timestamp': '2025-09-30 22:22:04.148142', 'step': 6987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:04.209463', 'step': 6987, 'epoch': 1} {'type': 'loss', 'content': 0.12614713609218597, 'timestamp': '2025-09-30 22:22:04.215916', 'step': 6988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:04.289205', 'step': 6988, 'epoch': 1} {'type': 'loss', 'content': 0.1265878826379776, 'timestamp': '2025-09-30 22:22:04.291533', 'step': 6989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:04.349758', 'step': 6989, 'epoch': 1} {'type': 'loss', 'content': 0.2020362913608551, 'timestamp': '2025-09-30 22:22:04.354451', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:22:18.885937', 'step': 6990, 'epoch': 1} {'type': 'pplx', 'content': 11149.734152476056, 'timestamp': '2025-09-30 22:22:18.890833', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:18.955719', 'step': 6990, 'epoch': 1} {'type': 'loss', 'content': 0.17205265164375305, 'timestamp': '2025-09-30 22:22:18.958912', 'step': 6991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:19.022032', 'step': 6991, 'epoch': 1} {'type': 'loss', 'content': 0.1706129014492035, 'timestamp': '2025-09-30 22:22:19.030346', 'step': 6992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:19.087901', 'step': 6992, 'epoch': 1} {'type': 'loss', 'content': 0.18412527441978455, 'timestamp': '2025-09-30 22:22:19.094725', 'step': 6993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:19.154657', 'step': 6993, 'epoch': 1} {'type': 'loss', 'content': 0.109848752617836, 'timestamp': '2025-09-30 22:22:19.157173', 'step': 6994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:19.218171', 'step': 6994, 'epoch': 1} {'type': 'loss', 'content': 0.23513156175613403, 'timestamp': '2025-09-30 22:22:19.220801', 'step': 6995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:19.280114', 'step': 6995, 'epoch': 1} {'type': 'loss', 'content': 0.09044656902551651, 'timestamp': '2025-09-30 22:22:19.287158', 'step': 6996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:19.344325', 'step': 6996, 'epoch': 1} {'type': 'loss', 'content': 0.13711808621883392, 'timestamp': '2025-09-30 22:22:19.347197', 'step': 6997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:19.404255', 'step': 6997, 'epoch': 1} {'type': 'loss', 'content': 0.21858611702919006, 'timestamp': '2025-09-30 22:22:19.407952', 'step': 6998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:19.467132', 'step': 6998, 'epoch': 1} {'type': 'loss', 'content': 0.12301183491945267, 'timestamp': '2025-09-30 22:22:19.475853', 'step': 6999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:19.548246', 'step': 6999, 'epoch': 1} {'type': 'loss', 'content': 0.21355651319026947, 'timestamp': '2025-09-30 22:22:19.558606', 'step': 7000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-30 22:22:19.974146', 'step': 7000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:20.033180', 'step': 7000, 'epoch': 1} {'type': 'loss', 'content': 0.0929916724562645, 'timestamp': '2025-09-30 22:22:20.042214', 'step': 7001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:20.110187', 'step': 7001, 'epoch': 1} {'type': 'loss', 'content': 0.16188853979110718, 'timestamp': '2025-09-30 22:22:20.113151', 'step': 7002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:20.170341', 'step': 7002, 'epoch': 1} {'type': 'loss', 'content': 0.27545320987701416, 'timestamp': '2025-09-30 22:22:20.175947', 'step': 7003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:20.234101', 'step': 7003, 'epoch': 1} {'type': 'loss', 'content': 0.15434426069259644, 'timestamp': '2025-09-30 22:22:20.246563', 'step': 7004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:20.306285', 'step': 7004, 'epoch': 1} {'type': 'loss', 'content': 0.14207860827445984, 'timestamp': '2025-09-30 22:22:20.316909', 'step': 7005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:20.377593', 'step': 7005, 'epoch': 1} {'type': 'loss', 'content': 0.1309712678194046, 'timestamp': '2025-09-30 22:22:20.381108', 'step': 7006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:20.448634', 'step': 7006, 'epoch': 1} {'type': 'loss', 'content': 0.1458768993616104, 'timestamp': '2025-09-30 22:22:20.453025', 'step': 7007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:20.520395', 'step': 7007, 'epoch': 1} {'type': 'loss', 'content': 0.09587565064430237, 'timestamp': '2025-09-30 22:22:20.526647', 'step': 7008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:20.588170', 'step': 7008, 'epoch': 1} {'type': 'loss', 'content': 0.16795247793197632, 'timestamp': '2025-09-30 22:22:20.593048', 'step': 7009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:20.652113', 'step': 7009, 'epoch': 1} {'type': 'loss', 'content': 0.13351286947727203, 'timestamp': '2025-09-30 22:22:20.655467', 'step': 7010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:20.722150', 'step': 7010, 'epoch': 1} {'type': 'loss', 'content': 0.13741107285022736, 'timestamp': '2025-09-30 22:22:20.724569', 'step': 7011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:20.781914', 'step': 7011, 'epoch': 1} {'type': 'loss', 'content': 0.1477097123861313, 'timestamp': '2025-09-30 22:22:20.792287', 'step': 7012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:20.851676', 'step': 7012, 'epoch': 1} {'type': 'loss', 'content': 0.2386825680732727, 'timestamp': '2025-09-30 22:22:20.855733', 'step': 7013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:20.924820', 'step': 7013, 'epoch': 1} {'type': 'loss', 'content': 0.2253604233264923, 'timestamp': '2025-09-30 22:22:20.927742', 'step': 7014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:20.984877', 'step': 7014, 'epoch': 1} {'type': 'loss', 'content': 0.1744345724582672, 'timestamp': '2025-09-30 22:22:20.987707', 'step': 7015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:21.045552', 'step': 7015, 'epoch': 1} {'type': 'loss', 'content': 0.23460163176059723, 'timestamp': '2025-09-30 22:22:21.059233', 'step': 7016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:21.121753', 'step': 7016, 'epoch': 1} {'type': 'loss', 'content': 0.23270811140537262, 'timestamp': '2025-09-30 22:22:21.129986', 'step': 7017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:21.187716', 'step': 7017, 'epoch': 1} {'type': 'loss', 'content': 0.20247279107570648, 'timestamp': '2025-09-30 22:22:21.190102', 'step': 7018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:21.259111', 'step': 7018, 'epoch': 1} {'type': 'loss', 'content': 0.17724493145942688, 'timestamp': '2025-09-30 22:22:21.265893', 'step': 7019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:21.325420', 'step': 7019, 'epoch': 1} {'type': 'loss', 'content': 0.09910643845796585, 'timestamp': '2025-09-30 22:22:21.332334', 'step': 7020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:21.397031', 'step': 7020, 'epoch': 1} {'type': 'loss', 'content': 0.19185295701026917, 'timestamp': '2025-09-30 22:22:21.405264', 'step': 7021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:21.468352', 'step': 7021, 'epoch': 1} {'type': 'loss', 'content': 0.14308512210845947, 'timestamp': '2025-09-30 22:22:21.474732', 'step': 7022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:21.538130', 'step': 7022, 'epoch': 1} {'type': 'loss', 'content': 0.15612520277500153, 'timestamp': '2025-09-30 22:22:21.542478', 'step': 7023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:21.607208', 'step': 7023, 'epoch': 1} {'type': 'loss', 'content': 0.09282242506742477, 'timestamp': '2025-09-30 22:22:21.614282', 'step': 7024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:21.678383', 'step': 7024, 'epoch': 1} {'type': 'loss', 'content': 0.2170218676328659, 'timestamp': '2025-09-30 22:22:21.683595', 'step': 7025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:21.743225', 'step': 7025, 'epoch': 1} {'type': 'loss', 'content': 0.26403912901878357, 'timestamp': '2025-09-30 22:22:21.746220', 'step': 7026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:21.808594', 'step': 7026, 'epoch': 1} {'type': 'loss', 'content': 0.176274374127388, 'timestamp': '2025-09-30 22:22:21.811874', 'step': 7027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:21.869157', 'step': 7027, 'epoch': 1} {'type': 'loss', 'content': 0.170680969953537, 'timestamp': '2025-09-30 22:22:21.875950', 'step': 7028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:22:21.933976', 'step': 7028, 'epoch': 1} {'type': 'loss', 'content': 0.09966174513101578, 'timestamp': '2025-09-30 22:22:21.943486', 'step': 7029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:22.001001', 'step': 7029, 'epoch': 1} {'type': 'loss', 'content': 0.15269604325294495, 'timestamp': '2025-09-30 22:22:22.009108', 'step': 7030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:22.066445', 'step': 7030, 'epoch': 1} {'type': 'loss', 'content': 0.08008544147014618, 'timestamp': '2025-09-30 22:22:22.069143', 'step': 7031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:22.132925', 'step': 7031, 'epoch': 1} {'type': 'loss', 'content': 0.10586820542812347, 'timestamp': '2025-09-30 22:22:22.140138', 'step': 7032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:22.198995', 'step': 7032, 'epoch': 1} {'type': 'loss', 'content': 0.12759745121002197, 'timestamp': '2025-09-30 22:22:22.203452', 'step': 7033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:22.261282', 'step': 7033, 'epoch': 1} {'type': 'loss', 'content': 0.18410930037498474, 'timestamp': '2025-09-30 22:22:22.264238', 'step': 7034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:22.331726', 'step': 7034, 'epoch': 1} {'type': 'loss', 'content': 0.1094374880194664, 'timestamp': '2025-09-30 22:22:22.334345', 'step': 7035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:22.391424', 'step': 7035, 'epoch': 1} {'type': 'loss', 'content': 0.11227387934923172, 'timestamp': '2025-09-30 22:22:22.402728', 'step': 7036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:22.464460', 'step': 7036, 'epoch': 1} {'type': 'loss', 'content': 0.1699942797422409, 'timestamp': '2025-09-30 22:22:22.467761', 'step': 7037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:22.527304', 'step': 7037, 'epoch': 1} {'type': 'loss', 'content': 0.1404757797718048, 'timestamp': '2025-09-30 22:22:22.535001', 'step': 7038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:22.596235', 'step': 7038, 'epoch': 1} {'type': 'loss', 'content': 0.1346345841884613, 'timestamp': '2025-09-30 22:22:22.604282', 'step': 7039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:22.666501', 'step': 7039, 'epoch': 1} {'type': 'loss', 'content': 0.17325176298618317, 'timestamp': '2025-09-30 22:22:22.675345', 'step': 7040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:22.732568', 'step': 7040, 'epoch': 1} {'type': 'loss', 'content': 0.10847406834363937, 'timestamp': '2025-09-30 22:22:22.735820', 'step': 7041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:22.794505', 'step': 7041, 'epoch': 1} {'type': 'loss', 'content': 0.22525642812252045, 'timestamp': '2025-09-30 22:22:22.801839', 'step': 7042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:22.860696', 'step': 7042, 'epoch': 1} {'type': 'loss', 'content': 0.16552062332630157, 'timestamp': '2025-09-30 22:22:22.865891', 'step': 7043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:22.930556', 'step': 7043, 'epoch': 1} {'type': 'loss', 'content': 0.1472245305776596, 'timestamp': '2025-09-30 22:22:22.937367', 'step': 7044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:22.996840', 'step': 7044, 'epoch': 1} {'type': 'loss', 'content': 0.16013513505458832, 'timestamp': '2025-09-30 22:22:23.004656', 'step': 7045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.068138', 'step': 7045, 'epoch': 1} {'type': 'loss', 'content': 0.12262089550495148, 'timestamp': '2025-09-30 22:22:23.072691', 'step': 7046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.134604', 'step': 7046, 'epoch': 1} {'type': 'loss', 'content': 0.20869788527488708, 'timestamp': '2025-09-30 22:22:23.137532', 'step': 7047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.200068', 'step': 7047, 'epoch': 1} {'type': 'loss', 'content': 0.23759302496910095, 'timestamp': '2025-09-30 22:22:23.213296', 'step': 7048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.275161', 'step': 7048, 'epoch': 1} {'type': 'loss', 'content': 0.13523834943771362, 'timestamp': '2025-09-30 22:22:23.278344', 'step': 7049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.337598', 'step': 7049, 'epoch': 1} {'type': 'loss', 'content': 0.11731090396642685, 'timestamp': '2025-09-30 22:22:23.340628', 'step': 7050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.398734', 'step': 7050, 'epoch': 1} {'type': 'loss', 'content': 0.21238777041435242, 'timestamp': '2025-09-30 22:22:23.404735', 'step': 7051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:23.463214', 'step': 7051, 'epoch': 1} {'type': 'loss', 'content': 0.20376543700695038, 'timestamp': '2025-09-30 22:22:23.472209', 'step': 7052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.541685', 'step': 7052, 'epoch': 1} {'type': 'loss', 'content': 0.1635555922985077, 'timestamp': '2025-09-30 22:22:23.545401', 'step': 7053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:23.603758', 'step': 7053, 'epoch': 1} {'type': 'loss', 'content': 0.15656381845474243, 'timestamp': '2025-09-30 22:22:23.608026', 'step': 7054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:23.671830', 'step': 7054, 'epoch': 1} {'type': 'loss', 'content': 0.1727897822856903, 'timestamp': '2025-09-30 22:22:23.680870', 'step': 7055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:23.744318', 'step': 7055, 'epoch': 1} {'type': 'loss', 'content': 0.08641062676906586, 'timestamp': '2025-09-30 22:22:23.751169', 'step': 7056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:23.808283', 'step': 7056, 'epoch': 1} {'type': 'loss', 'content': 0.18366999924182892, 'timestamp': '2025-09-30 22:22:23.813850', 'step': 7057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:23.876905', 'step': 7057, 'epoch': 1} {'type': 'loss', 'content': 0.2170809507369995, 'timestamp': '2025-09-30 22:22:23.881861', 'step': 7058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:23.941739', 'step': 7058, 'epoch': 1} {'type': 'loss', 'content': 0.2557699978351593, 'timestamp': '2025-09-30 22:22:23.946483', 'step': 7059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:24.010455', 'step': 7059, 'epoch': 1} {'type': 'loss', 'content': 0.3174814283847809, 'timestamp': '2025-09-30 22:22:24.022773', 'step': 7060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:24.102666', 'step': 7060, 'epoch': 1} {'type': 'loss', 'content': 0.1409245878458023, 'timestamp': '2025-09-30 22:22:24.121264', 'step': 7061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:24.183282', 'step': 7061, 'epoch': 1} {'type': 'loss', 'content': 0.29677194356918335, 'timestamp': '2025-09-30 22:22:24.188482', 'step': 7062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:24.252397', 'step': 7062, 'epoch': 1} {'type': 'loss', 'content': 0.25805819034576416, 'timestamp': '2025-09-30 22:22:24.256120', 'step': 7063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:24.317783', 'step': 7063, 'epoch': 1} {'type': 'loss', 'content': 0.20984923839569092, 'timestamp': '2025-09-30 22:22:24.327317', 'step': 7064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:24.384203', 'step': 7064, 'epoch': 1} {'type': 'loss', 'content': 0.17318791151046753, 'timestamp': '2025-09-30 22:22:24.395316', 'step': 7065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:24.457285', 'step': 7065, 'epoch': 1} {'type': 'loss', 'content': 0.2024846225976944, 'timestamp': '2025-09-30 22:22:24.460369', 'step': 7066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:22:24.518705', 'step': 7066, 'epoch': 1} {'type': 'loss', 'content': 0.1949787437915802, 'timestamp': '2025-09-30 22:22:24.527859', 'step': 7067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:24.585913', 'step': 7067, 'epoch': 1} {'type': 'loss', 'content': 0.13501331210136414, 'timestamp': '2025-09-30 22:22:24.593257', 'step': 7068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:24.649110', 'step': 7068, 'epoch': 1} {'type': 'loss', 'content': 0.1583937704563141, 'timestamp': '2025-09-30 22:22:24.652918', 'step': 7069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:24.714722', 'step': 7069, 'epoch': 1} {'type': 'loss', 'content': 0.11739383637905121, 'timestamp': '2025-09-30 22:22:24.717208', 'step': 7070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:24.785962', 'step': 7070, 'epoch': 1} {'type': 'loss', 'content': 0.1984039545059204, 'timestamp': '2025-09-30 22:22:24.789265', 'step': 7071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:24.849315', 'step': 7071, 'epoch': 1} {'type': 'loss', 'content': 0.1523050218820572, 'timestamp': '2025-09-30 22:22:24.857969', 'step': 7072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:24.915502', 'step': 7072, 'epoch': 1} {'type': 'loss', 'content': 0.1343095302581787, 'timestamp': '2025-09-30 22:22:24.920035', 'step': 7073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:24.977503', 'step': 7073, 'epoch': 1} {'type': 'loss', 'content': 0.1676381379365921, 'timestamp': '2025-09-30 22:22:24.983235', 'step': 7074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:25.048756', 'step': 7074, 'epoch': 1} {'type': 'loss', 'content': 0.09028851985931396, 'timestamp': '2025-09-30 22:22:25.054905', 'step': 7075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.117406', 'step': 7075, 'epoch': 1} {'type': 'loss', 'content': 0.09413191676139832, 'timestamp': '2025-09-30 22:22:25.124086', 'step': 7076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:25.184332', 'step': 7076, 'epoch': 1} {'type': 'loss', 'content': 0.09781826287508011, 'timestamp': '2025-09-30 22:22:25.190403', 'step': 7077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:25.268404', 'step': 7077, 'epoch': 1} {'type': 'loss', 'content': 0.16294975578784943, 'timestamp': '2025-09-30 22:22:25.271230', 'step': 7078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.327899', 'step': 7078, 'epoch': 1} {'type': 'loss', 'content': 0.19285644590854645, 'timestamp': '2025-09-30 22:22:25.331598', 'step': 7079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:25.393088', 'step': 7079, 'epoch': 1} {'type': 'loss', 'content': 0.08513174951076508, 'timestamp': '2025-09-30 22:22:25.398958', 'step': 7080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.456889', 'step': 7080, 'epoch': 1} {'type': 'loss', 'content': 0.15881657600402832, 'timestamp': '2025-09-30 22:22:25.460124', 'step': 7081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.529852', 'step': 7081, 'epoch': 1} {'type': 'loss', 'content': 0.158442884683609, 'timestamp': '2025-09-30 22:22:25.533308', 'step': 7082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:25.594771', 'step': 7082, 'epoch': 1} {'type': 'loss', 'content': 0.10898488759994507, 'timestamp': '2025-09-30 22:22:25.598424', 'step': 7083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.656538', 'step': 7083, 'epoch': 1} {'type': 'loss', 'content': 0.16278895735740662, 'timestamp': '2025-09-30 22:22:25.665294', 'step': 7084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:25.724740', 'step': 7084, 'epoch': 1} {'type': 'loss', 'content': 0.21678046882152557, 'timestamp': '2025-09-30 22:22:25.730730', 'step': 7085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.795431', 'step': 7085, 'epoch': 1} {'type': 'loss', 'content': 0.17842546105384827, 'timestamp': '2025-09-30 22:22:25.800151', 'step': 7086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.862885', 'step': 7086, 'epoch': 1} {'type': 'loss', 'content': 0.20531141757965088, 'timestamp': '2025-09-30 22:22:25.870406', 'step': 7087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.928806', 'step': 7087, 'epoch': 1} {'type': 'loss', 'content': 0.14380790293216705, 'timestamp': '2025-09-30 22:22:25.934938', 'step': 7088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:25.995143', 'step': 7088, 'epoch': 1} {'type': 'loss', 'content': 0.10702987015247345, 'timestamp': '2025-09-30 22:22:25.998780', 'step': 7089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:26.062661', 'step': 7089, 'epoch': 1} {'type': 'loss', 'content': 0.0672692134976387, 'timestamp': '2025-09-30 22:22:26.065789', 'step': 7090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:26.124770', 'step': 7090, 'epoch': 1} {'type': 'loss', 'content': 0.17274439334869385, 'timestamp': '2025-09-30 22:22:26.127558', 'step': 7091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:26.185980', 'step': 7091, 'epoch': 1} {'type': 'loss', 'content': 0.13013337552547455, 'timestamp': '2025-09-30 22:22:26.193063', 'step': 7092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:26.257659', 'step': 7092, 'epoch': 1} {'type': 'loss', 'content': 0.0997287854552269, 'timestamp': '2025-09-30 22:22:26.260636', 'step': 7093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:26.318756', 'step': 7093, 'epoch': 1} {'type': 'loss', 'content': 0.11704974621534348, 'timestamp': '2025-09-30 22:22:26.329869', 'step': 7094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:26.388520', 'step': 7094, 'epoch': 1} {'type': 'loss', 'content': 0.1978628933429718, 'timestamp': '2025-09-30 22:22:26.391240', 'step': 7095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:26.452553', 'step': 7095, 'epoch': 1} {'type': 'loss', 'content': 0.13780905306339264, 'timestamp': '2025-09-30 22:22:26.460608', 'step': 7096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:26.543433', 'step': 7096, 'epoch': 1} {'type': 'loss', 'content': 0.15614819526672363, 'timestamp': '2025-09-30 22:22:26.547367', 'step': 7097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:26.604249', 'step': 7097, 'epoch': 1} {'type': 'loss', 'content': 0.19589699804782867, 'timestamp': '2025-09-30 22:22:26.607907', 'step': 7098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:26.666766', 'step': 7098, 'epoch': 1} {'type': 'loss', 'content': 0.13295142352581024, 'timestamp': '2025-09-30 22:22:26.671737', 'step': 7099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:26.730933', 'step': 7099, 'epoch': 1} {'type': 'loss', 'content': 0.18505826592445374, 'timestamp': '2025-09-30 22:22:26.736990', 'step': 7100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:26.799819', 'step': 7100, 'epoch': 1} {'type': 'loss', 'content': 0.18276004493236542, 'timestamp': '2025-09-30 22:22:26.802790', 'step': 7101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:26.860408', 'step': 7101, 'epoch': 1} {'type': 'loss', 'content': 0.21872971951961517, 'timestamp': '2025-09-30 22:22:26.863774', 'step': 7102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:26.927637', 'step': 7102, 'epoch': 1} {'type': 'loss', 'content': 0.14789631962776184, 'timestamp': '2025-09-30 22:22:26.933698', 'step': 7103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:26.996625', 'step': 7103, 'epoch': 1} {'type': 'loss', 'content': 0.2013791799545288, 'timestamp': '2025-09-30 22:22:27.007278', 'step': 7104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:27.065838', 'step': 7104, 'epoch': 1} {'type': 'loss', 'content': 0.10511243343353271, 'timestamp': '2025-09-30 22:22:27.072733', 'step': 7105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:27.130396', 'step': 7105, 'epoch': 1} {'type': 'loss', 'content': 0.16042757034301758, 'timestamp': '2025-09-30 22:22:27.135725', 'step': 7106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:27.202257', 'step': 7106, 'epoch': 1} {'type': 'loss', 'content': 0.13051153719425201, 'timestamp': '2025-09-30 22:22:27.214361', 'step': 7107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:22:27.282664', 'step': 7107, 'epoch': 1} {'type': 'loss', 'content': 0.19451116025447845, 'timestamp': '2025-09-30 22:22:27.288763', 'step': 7108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:27.362299', 'step': 7108, 'epoch': 1} {'type': 'loss', 'content': 0.1231980174779892, 'timestamp': '2025-09-30 22:22:27.364857', 'step': 7109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:27.432156', 'step': 7109, 'epoch': 1} {'type': 'loss', 'content': 0.13855217397212982, 'timestamp': '2025-09-30 22:22:27.442157', 'step': 7110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:27.501850', 'step': 7110, 'epoch': 1} {'type': 'loss', 'content': 0.15592332184314728, 'timestamp': '2025-09-30 22:22:27.505011', 'step': 7111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:27.578654', 'step': 7111, 'epoch': 1} {'type': 'loss', 'content': 0.1883455216884613, 'timestamp': '2025-09-30 22:22:27.586157', 'step': 7112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:27.658540', 'step': 7112, 'epoch': 1} {'type': 'loss', 'content': 0.1515226811170578, 'timestamp': '2025-09-30 22:22:27.662937', 'step': 7113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:27.731747', 'step': 7113, 'epoch': 1} {'type': 'loss', 'content': 0.10398006439208984, 'timestamp': '2025-09-30 22:22:27.735196', 'step': 7114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:27.795090', 'step': 7114, 'epoch': 1} {'type': 'loss', 'content': 0.19058044254779816, 'timestamp': '2025-09-30 22:22:27.803945', 'step': 7115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:27.879779', 'step': 7115, 'epoch': 1} {'type': 'loss', 'content': 0.10604841262102127, 'timestamp': '2025-09-30 22:22:27.887693', 'step': 7116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:27.951691', 'step': 7116, 'epoch': 1} {'type': 'loss', 'content': 0.20315417647361755, 'timestamp': '2025-09-30 22:22:27.961868', 'step': 7117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:28.021083', 'step': 7117, 'epoch': 1} {'type': 'loss', 'content': 0.2358684241771698, 'timestamp': '2025-09-30 22:22:28.024429', 'step': 7118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:28.086632', 'step': 7118, 'epoch': 1} {'type': 'loss', 'content': 0.09806068986654282, 'timestamp': '2025-09-30 22:22:28.090444', 'step': 7119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:28.147603', 'step': 7119, 'epoch': 1} {'type': 'loss', 'content': 0.11795753985643387, 'timestamp': '2025-09-30 22:22:28.159975', 'step': 7120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:28.241120', 'step': 7120, 'epoch': 1} {'type': 'loss', 'content': 0.1542213261127472, 'timestamp': '2025-09-30 22:22:28.243704', 'step': 7121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:28.315108', 'step': 7121, 'epoch': 1} {'type': 'loss', 'content': 0.10731548815965652, 'timestamp': '2025-09-30 22:22:28.318305', 'step': 7122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:28.377978', 'step': 7122, 'epoch': 1} {'type': 'loss', 'content': 0.1458941400051117, 'timestamp': '2025-09-30 22:22:28.388883', 'step': 7123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:28.449017', 'step': 7123, 'epoch': 1} {'type': 'loss', 'content': 0.13957586884498596, 'timestamp': '2025-09-30 22:22:28.464806', 'step': 7124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:28.526442', 'step': 7124, 'epoch': 1} {'type': 'loss', 'content': 0.10301313549280167, 'timestamp': '2025-09-30 22:22:28.531117', 'step': 7125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:28.589678', 'step': 7125, 'epoch': 1} {'type': 'loss', 'content': 0.10673711448907852, 'timestamp': '2025-09-30 22:22:28.595490', 'step': 7126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:28.674636', 'step': 7126, 'epoch': 1} {'type': 'loss', 'content': 0.10562696307897568, 'timestamp': '2025-09-30 22:22:28.682532', 'step': 7127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:28.746457', 'step': 7127, 'epoch': 1} {'type': 'loss', 'content': 0.24940578639507294, 'timestamp': '2025-09-30 22:22:28.758925', 'step': 7128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:28.823975', 'step': 7128, 'epoch': 1} {'type': 'loss', 'content': 0.14745181798934937, 'timestamp': '2025-09-30 22:22:28.827632', 'step': 7129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:28.886645', 'step': 7129, 'epoch': 1} {'type': 'loss', 'content': 0.22423972189426422, 'timestamp': '2025-09-30 22:22:28.893817', 'step': 7130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:28.968797', 'step': 7130, 'epoch': 1} {'type': 'loss', 'content': 0.1032165139913559, 'timestamp': '2025-09-30 22:22:28.975145', 'step': 7131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:29.035777', 'step': 7131, 'epoch': 1} {'type': 'loss', 'content': 0.17297165095806122, 'timestamp': '2025-09-30 22:22:29.042589', 'step': 7132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:29.101852', 'step': 7132, 'epoch': 1} {'type': 'loss', 'content': 0.1611379086971283, 'timestamp': '2025-09-30 22:22:29.115535', 'step': 7133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:29.175397', 'step': 7133, 'epoch': 1} {'type': 'loss', 'content': 0.11496397852897644, 'timestamp': '2025-09-30 22:22:29.184064', 'step': 7134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:29.250068', 'step': 7134, 'epoch': 1} {'type': 'loss', 'content': 0.16552478075027466, 'timestamp': '2025-09-30 22:22:29.255401', 'step': 7135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:29.331157', 'step': 7135, 'epoch': 1} {'type': 'loss', 'content': 0.14986902475357056, 'timestamp': '2025-09-30 22:22:29.337664', 'step': 7136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:29.397161', 'step': 7136, 'epoch': 1} {'type': 'loss', 'content': 0.09405824542045593, 'timestamp': '2025-09-30 22:22:29.403613', 'step': 7137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:29.475344', 'step': 7137, 'epoch': 1} {'type': 'loss', 'content': 0.10069207102060318, 'timestamp': '2025-09-30 22:22:29.480456', 'step': 7138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:29.542005', 'step': 7138, 'epoch': 1} {'type': 'loss', 'content': 0.07726088166236877, 'timestamp': '2025-09-30 22:22:29.549205', 'step': 7139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:29.628554', 'step': 7139, 'epoch': 1} {'type': 'loss', 'content': 0.13040922582149506, 'timestamp': '2025-09-30 22:22:29.641539', 'step': 7140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:29.703430', 'step': 7140, 'epoch': 1} {'type': 'loss', 'content': 0.12717920541763306, 'timestamp': '2025-09-30 22:22:29.706329', 'step': 7141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:29.768855', 'step': 7141, 'epoch': 1} {'type': 'loss', 'content': 0.09102047234773636, 'timestamp': '2025-09-30 22:22:29.771060', 'step': 7142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:29.830505', 'step': 7142, 'epoch': 1} {'type': 'loss', 'content': 0.2221292108297348, 'timestamp': '2025-09-30 22:22:29.833268', 'step': 7143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:29.893830', 'step': 7143, 'epoch': 1} {'type': 'loss', 'content': 0.16473114490509033, 'timestamp': '2025-09-30 22:22:29.905545', 'step': 7144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:29.969611', 'step': 7144, 'epoch': 1} {'type': 'loss', 'content': 0.10067261010408401, 'timestamp': '2025-09-30 22:22:29.972374', 'step': 7145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:30.029467', 'step': 7145, 'epoch': 1} {'type': 'loss', 'content': 0.1973869949579239, 'timestamp': '2025-09-30 22:22:30.032222', 'step': 7146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:30.089166', 'step': 7146, 'epoch': 1} {'type': 'loss', 'content': 0.0943564623594284, 'timestamp': '2025-09-30 22:22:30.093433', 'step': 7147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:30.153229', 'step': 7147, 'epoch': 1} {'type': 'loss', 'content': 0.08618427067995071, 'timestamp': '2025-09-30 22:22:30.163185', 'step': 7148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:30.236092', 'step': 7148, 'epoch': 1} {'type': 'loss', 'content': 0.19216085970401764, 'timestamp': '2025-09-30 22:22:30.242906', 'step': 7149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:30.311497', 'step': 7149, 'epoch': 1} {'type': 'loss', 'content': 0.12189876288175583, 'timestamp': '2025-09-30 22:22:30.314927', 'step': 7150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:30.377006', 'step': 7150, 'epoch': 1} {'type': 'loss', 'content': 0.12439771741628647, 'timestamp': '2025-09-30 22:22:30.380274', 'step': 7151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:30.442013', 'step': 7151, 'epoch': 1} {'type': 'loss', 'content': 0.21194641292095184, 'timestamp': '2025-09-30 22:22:30.452183', 'step': 7152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:30.509806', 'step': 7152, 'epoch': 1} {'type': 'loss', 'content': 0.11782076954841614, 'timestamp': '2025-09-30 22:22:30.513548', 'step': 7153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:30.576180', 'step': 7153, 'epoch': 1} {'type': 'loss', 'content': 0.13928988575935364, 'timestamp': '2025-09-30 22:22:30.578944', 'step': 7154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:30.640990', 'step': 7154, 'epoch': 1} {'type': 'loss', 'content': 0.10684439539909363, 'timestamp': '2025-09-30 22:22:30.648674', 'step': 7155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:30.707996', 'step': 7155, 'epoch': 1} {'type': 'loss', 'content': 0.1192953959107399, 'timestamp': '2025-09-30 22:22:30.714834', 'step': 7156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:30.776606', 'step': 7156, 'epoch': 1} {'type': 'loss', 'content': 0.13813841342926025, 'timestamp': '2025-09-30 22:22:30.780788', 'step': 7157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:22:30.839006', 'step': 7157, 'epoch': 1} {'type': 'loss', 'content': 0.20107078552246094, 'timestamp': '2025-09-30 22:22:30.841618', 'step': 7158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:30.902251', 'step': 7158, 'epoch': 1} {'type': 'loss', 'content': 0.20203757286071777, 'timestamp': '2025-09-30 22:22:30.904863', 'step': 7159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:30.962952', 'step': 7159, 'epoch': 1} {'type': 'loss', 'content': 0.10319723188877106, 'timestamp': '2025-09-30 22:22:30.970719', 'step': 7160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:31.031731', 'step': 7160, 'epoch': 1} {'type': 'loss', 'content': 0.2732633650302887, 'timestamp': '2025-09-30 22:22:31.034497', 'step': 7161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:31.106625', 'step': 7161, 'epoch': 1} {'type': 'loss', 'content': 0.1643029898405075, 'timestamp': '2025-09-30 22:22:31.109850', 'step': 7162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:31.170723', 'step': 7162, 'epoch': 1} {'type': 'loss', 'content': 0.1914113461971283, 'timestamp': '2025-09-30 22:22:31.181895', 'step': 7163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:31.247341', 'step': 7163, 'epoch': 1} {'type': 'loss', 'content': 0.14217010140419006, 'timestamp': '2025-09-30 22:22:31.254307', 'step': 7164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:31.314538', 'step': 7164, 'epoch': 1} {'type': 'loss', 'content': 0.1416340321302414, 'timestamp': '2025-09-30 22:22:31.317274', 'step': 7165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:31.377155', 'step': 7165, 'epoch': 1} {'type': 'loss', 'content': 0.10831378400325775, 'timestamp': '2025-09-30 22:22:31.380691', 'step': 7166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:31.442333', 'step': 7166, 'epoch': 1} {'type': 'loss', 'content': 0.25163698196411133, 'timestamp': '2025-09-30 22:22:31.444655', 'step': 7167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:31.528775', 'step': 7167, 'epoch': 1} {'type': 'loss', 'content': 0.14076083898544312, 'timestamp': '2025-09-30 22:22:31.539367', 'step': 7168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:31.598616', 'step': 7168, 'epoch': 1} {'type': 'loss', 'content': 0.19567972421646118, 'timestamp': '2025-09-30 22:22:31.601242', 'step': 7169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:31.660752', 'step': 7169, 'epoch': 1} {'type': 'loss', 'content': 0.10666339099407196, 'timestamp': '2025-09-30 22:22:31.664098', 'step': 7170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:31.743082', 'step': 7170, 'epoch': 1} {'type': 'loss', 'content': 0.12188275158405304, 'timestamp': '2025-09-30 22:22:31.747722', 'step': 7171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:31.809892', 'step': 7171, 'epoch': 1} {'type': 'loss', 'content': 0.20074591040611267, 'timestamp': '2025-09-30 22:22:31.818073', 'step': 7172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:31.892326', 'step': 7172, 'epoch': 1} {'type': 'loss', 'content': 0.17626774311065674, 'timestamp': '2025-09-30 22:22:31.895535', 'step': 7173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:31.974945', 'step': 7173, 'epoch': 1} {'type': 'loss', 'content': 0.15519049763679504, 'timestamp': '2025-09-30 22:22:31.978625', 'step': 7174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:32.054402', 'step': 7174, 'epoch': 1} {'type': 'loss', 'content': 0.13620524108409882, 'timestamp': '2025-09-30 22:22:32.057343', 'step': 7175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:32.134298', 'step': 7175, 'epoch': 1} {'type': 'loss', 'content': 0.16133137047290802, 'timestamp': '2025-09-30 22:22:32.141552', 'step': 7176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:32.223193', 'step': 7176, 'epoch': 1} {'type': 'loss', 'content': 0.17820478975772858, 'timestamp': '2025-09-30 22:22:32.226135', 'step': 7177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:32.289580', 'step': 7177, 'epoch': 1} {'type': 'loss', 'content': 0.11732008308172226, 'timestamp': '2025-09-30 22:22:32.292738', 'step': 7178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:32.363243', 'step': 7178, 'epoch': 1} {'type': 'loss', 'content': 0.09068730473518372, 'timestamp': '2025-09-30 22:22:32.365928', 'step': 7179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:32.445062', 'step': 7179, 'epoch': 1} {'type': 'loss', 'content': 0.14027075469493866, 'timestamp': '2025-09-30 22:22:32.452134', 'step': 7180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:32.563417', 'step': 7180, 'epoch': 1} {'type': 'loss', 'content': 0.21953417360782623, 'timestamp': '2025-09-30 22:22:32.566186', 'step': 7181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:32.627209', 'step': 7181, 'epoch': 1} {'type': 'loss', 'content': 0.14805282652378082, 'timestamp': '2025-09-30 22:22:32.630333', 'step': 7182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:32.708306', 'step': 7182, 'epoch': 1} {'type': 'loss', 'content': 0.15240725874900818, 'timestamp': '2025-09-30 22:22:32.717182', 'step': 7183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:32.793452', 'step': 7183, 'epoch': 1} {'type': 'loss', 'content': 0.20513112843036652, 'timestamp': '2025-09-30 22:22:32.799546', 'step': 7184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:32.876636', 'step': 7184, 'epoch': 1} {'type': 'loss', 'content': 0.17945146560668945, 'timestamp': '2025-09-30 22:22:32.880988', 'step': 7185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:32.967418', 'step': 7185, 'epoch': 1} {'type': 'loss', 'content': 0.10568588972091675, 'timestamp': '2025-09-30 22:22:32.969720', 'step': 7186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:33.051110', 'step': 7186, 'epoch': 1} {'type': 'loss', 'content': 0.17807568609714508, 'timestamp': '2025-09-30 22:22:33.054980', 'step': 7187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:33.124236', 'step': 7187, 'epoch': 1} {'type': 'loss', 'content': 0.14670494198799133, 'timestamp': '2025-09-30 22:22:33.131484', 'step': 7188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:33.190867', 'step': 7188, 'epoch': 1} {'type': 'loss', 'content': 0.2678120732307434, 'timestamp': '2025-09-30 22:22:33.194141', 'step': 7189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:33.259588', 'step': 7189, 'epoch': 1} {'type': 'loss', 'content': 0.11650657653808594, 'timestamp': '2025-09-30 22:22:33.270139', 'step': 7190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:33.330592', 'step': 7190, 'epoch': 1} {'type': 'loss', 'content': 0.1725243180990219, 'timestamp': '2025-09-30 22:22:33.335506', 'step': 7191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:33.405971', 'step': 7191, 'epoch': 1} {'type': 'loss', 'content': 0.18411119282245636, 'timestamp': '2025-09-30 22:22:33.412841', 'step': 7192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:33.481242', 'step': 7192, 'epoch': 1} {'type': 'loss', 'content': 0.19871672987937927, 'timestamp': '2025-09-30 22:22:33.484656', 'step': 7193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:33.544503', 'step': 7193, 'epoch': 1} {'type': 'loss', 'content': 0.15951046347618103, 'timestamp': '2025-09-30 22:22:33.547246', 'step': 7194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:33.607001', 'step': 7194, 'epoch': 1} {'type': 'loss', 'content': 0.18871358036994934, 'timestamp': '2025-09-30 22:22:33.612730', 'step': 7195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:33.679702', 'step': 7195, 'epoch': 1} {'type': 'loss', 'content': 0.16931846737861633, 'timestamp': '2025-09-30 22:22:33.686652', 'step': 7196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:33.752263', 'step': 7196, 'epoch': 1} {'type': 'loss', 'content': 0.21398626267910004, 'timestamp': '2025-09-30 22:22:33.759275', 'step': 7197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:33.819801', 'step': 7197, 'epoch': 1} {'type': 'loss', 'content': 0.07559766620397568, 'timestamp': '2025-09-30 22:22:33.823707', 'step': 7198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:33.886881', 'step': 7198, 'epoch': 1} {'type': 'loss', 'content': 0.12785542011260986, 'timestamp': '2025-09-30 22:22:33.889797', 'step': 7199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:33.950393', 'step': 7199, 'epoch': 1} {'type': 'loss', 'content': 0.12280929088592529, 'timestamp': '2025-09-30 22:22:33.956950', 'step': 7200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:34.015621', 'step': 7200, 'epoch': 1} {'type': 'loss', 'content': 0.06718292087316513, 'timestamp': '2025-09-30 22:22:34.018914', 'step': 7201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:34.085218', 'step': 7201, 'epoch': 1} {'type': 'loss', 'content': 0.14631080627441406, 'timestamp': '2025-09-30 22:22:34.089169', 'step': 7202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:34.147778', 'step': 7202, 'epoch': 1} {'type': 'loss', 'content': 0.2500411570072174, 'timestamp': '2025-09-30 22:22:34.150633', 'step': 7203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:34.216096', 'step': 7203, 'epoch': 1} {'type': 'loss', 'content': 0.1922767460346222, 'timestamp': '2025-09-30 22:22:34.222176', 'step': 7204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:34.292269', 'step': 7204, 'epoch': 1} {'type': 'loss', 'content': 0.22705401480197906, 'timestamp': '2025-09-30 22:22:34.295103', 'step': 7205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:34.353570', 'step': 7205, 'epoch': 1} {'type': 'loss', 'content': 0.14415933191776276, 'timestamp': '2025-09-30 22:22:34.356993', 'step': 7206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:34.417725', 'step': 7206, 'epoch': 1} {'type': 'loss', 'content': 0.22887343168258667, 'timestamp': '2025-09-30 22:22:34.421313', 'step': 7207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:34.481159', 'step': 7207, 'epoch': 1} {'type': 'loss', 'content': 0.17053763568401337, 'timestamp': '2025-09-30 22:22:34.491343', 'step': 7208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:34.551780', 'step': 7208, 'epoch': 1} {'type': 'loss', 'content': 0.1770651936531067, 'timestamp': '2025-09-30 22:22:34.557107', 'step': 7209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:22:34.616278', 'step': 7209, 'epoch': 1} {'type': 'loss', 'content': 0.15170881152153015, 'timestamp': '2025-09-30 22:22:34.620104', 'step': 7210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:34.692311', 'step': 7210, 'epoch': 1} {'type': 'loss', 'content': 0.09606914222240448, 'timestamp': '2025-09-30 22:22:34.696868', 'step': 7211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:34.759516', 'step': 7211, 'epoch': 1} {'type': 'loss', 'content': 0.12945742905139923, 'timestamp': '2025-09-30 22:22:34.769489', 'step': 7212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:34.828243', 'step': 7212, 'epoch': 1} {'type': 'loss', 'content': 0.15311141312122345, 'timestamp': '2025-09-30 22:22:34.831052', 'step': 7213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:34.896161', 'step': 7213, 'epoch': 1} {'type': 'loss', 'content': 0.18659529089927673, 'timestamp': '2025-09-30 22:22:34.899061', 'step': 7214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:34.960781', 'step': 7214, 'epoch': 1} {'type': 'loss', 'content': 0.11256585270166397, 'timestamp': '2025-09-30 22:22:34.963745', 'step': 7215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:35.021624', 'step': 7215, 'epoch': 1} {'type': 'loss', 'content': 0.1312067061662674, 'timestamp': '2025-09-30 22:22:35.028352', 'step': 7216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:35.093217', 'step': 7216, 'epoch': 1} {'type': 'loss', 'content': 0.16907741129398346, 'timestamp': '2025-09-30 22:22:35.095898', 'step': 7217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:35.158127', 'step': 7217, 'epoch': 1} {'type': 'loss', 'content': 0.09131671488285065, 'timestamp': '2025-09-30 22:22:35.160158', 'step': 7218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:35.223471', 'step': 7218, 'epoch': 1} {'type': 'loss', 'content': 0.14184004068374634, 'timestamp': '2025-09-30 22:22:35.228315', 'step': 7219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:35.304790', 'step': 7219, 'epoch': 1} {'type': 'loss', 'content': 0.1521715223789215, 'timestamp': '2025-09-30 22:22:35.310939', 'step': 7220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:35.375876', 'step': 7220, 'epoch': 1} {'type': 'loss', 'content': 0.1680838167667389, 'timestamp': '2025-09-30 22:22:35.379461', 'step': 7221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:35.438515', 'step': 7221, 'epoch': 1} {'type': 'loss', 'content': 0.14373397827148438, 'timestamp': '2025-09-30 22:22:35.441438', 'step': 7222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:35.507820', 'step': 7222, 'epoch': 1} {'type': 'loss', 'content': 0.16424410045146942, 'timestamp': '2025-09-30 22:22:35.511495', 'step': 7223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:35.582942', 'step': 7223, 'epoch': 1} {'type': 'loss', 'content': 0.10605571419000626, 'timestamp': '2025-09-30 22:22:35.589452', 'step': 7224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:35.660563', 'step': 7224, 'epoch': 1} {'type': 'loss', 'content': 0.18727070093154907, 'timestamp': '2025-09-30 22:22:35.665172', 'step': 7225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:35.740333', 'step': 7225, 'epoch': 1} {'type': 'loss', 'content': 0.200047567486763, 'timestamp': '2025-09-30 22:22:35.753013', 'step': 7226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:35.820313', 'step': 7226, 'epoch': 1} {'type': 'loss', 'content': 0.15625464916229248, 'timestamp': '2025-09-30 22:22:35.823204', 'step': 7227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:35.884667', 'step': 7227, 'epoch': 1} {'type': 'loss', 'content': 0.10472097247838974, 'timestamp': '2025-09-30 22:22:35.902633', 'step': 7228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:22:35.966956', 'step': 7228, 'epoch': 1} {'type': 'loss', 'content': 0.13111355900764465, 'timestamp': '2025-09-30 22:22:35.970176', 'step': 7229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:36.038700', 'step': 7229, 'epoch': 1} {'type': 'loss', 'content': 0.12089308351278305, 'timestamp': '2025-09-30 22:22:36.044318', 'step': 7230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.103084', 'step': 7230, 'epoch': 1} {'type': 'loss', 'content': 0.17480957508087158, 'timestamp': '2025-09-30 22:22:36.105703', 'step': 7231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.169481', 'step': 7231, 'epoch': 1} {'type': 'loss', 'content': 0.09575965255498886, 'timestamp': '2025-09-30 22:22:36.176130', 'step': 7232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.235238', 'step': 7232, 'epoch': 1} {'type': 'loss', 'content': 0.13122719526290894, 'timestamp': '2025-09-30 22:22:36.240787', 'step': 7233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.309735', 'step': 7233, 'epoch': 1} {'type': 'loss', 'content': 0.09049613028764725, 'timestamp': '2025-09-30 22:22:36.315001', 'step': 7234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.380078', 'step': 7234, 'epoch': 1} {'type': 'loss', 'content': 0.08565615862607956, 'timestamp': '2025-09-30 22:22:36.383982', 'step': 7235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:36.456768', 'step': 7235, 'epoch': 1} {'type': 'loss', 'content': 0.1183529868721962, 'timestamp': '2025-09-30 22:22:36.463770', 'step': 7236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.522828', 'step': 7236, 'epoch': 1} {'type': 'loss', 'content': 0.13093201816082, 'timestamp': '2025-09-30 22:22:36.525932', 'step': 7237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.596172', 'step': 7237, 'epoch': 1} {'type': 'loss', 'content': 0.08263304829597473, 'timestamp': '2025-09-30 22:22:36.599402', 'step': 7238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:36.660999', 'step': 7238, 'epoch': 1} {'type': 'loss', 'content': 0.13517148792743683, 'timestamp': '2025-09-30 22:22:36.663794', 'step': 7239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:36.727239', 'step': 7239, 'epoch': 1} {'type': 'loss', 'content': 0.2006630152463913, 'timestamp': '2025-09-30 22:22:36.733601', 'step': 7240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:36.806870', 'step': 7240, 'epoch': 1} {'type': 'loss', 'content': 0.13884316384792328, 'timestamp': '2025-09-30 22:22:36.814158', 'step': 7241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:36.873033', 'step': 7241, 'epoch': 1} {'type': 'loss', 'content': 0.07251113653182983, 'timestamp': '2025-09-30 22:22:36.875626', 'step': 7242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:36.934733', 'step': 7242, 'epoch': 1} {'type': 'loss', 'content': 0.20559091866016388, 'timestamp': '2025-09-30 22:22:36.937479', 'step': 7243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:36.995335', 'step': 7243, 'epoch': 1} {'type': 'loss', 'content': 0.11638445407152176, 'timestamp': '2025-09-30 22:22:37.001708', 'step': 7244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:37.068481', 'step': 7244, 'epoch': 1} {'type': 'loss', 'content': 0.19606776535511017, 'timestamp': '2025-09-30 22:22:37.070926', 'step': 7245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:37.128200', 'step': 7245, 'epoch': 1} {'type': 'loss', 'content': 0.16660137474536896, 'timestamp': '2025-09-30 22:22:37.134899', 'step': 7246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:37.206452', 'step': 7246, 'epoch': 1} {'type': 'loss', 'content': 0.10417668521404266, 'timestamp': '2025-09-30 22:22:37.209647', 'step': 7247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:37.269808', 'step': 7247, 'epoch': 1} {'type': 'loss', 'content': 0.23022638261318207, 'timestamp': '2025-09-30 22:22:37.283067', 'step': 7248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:37.342253', 'step': 7248, 'epoch': 1} {'type': 'loss', 'content': 0.11265014857053757, 'timestamp': '2025-09-30 22:22:37.353660', 'step': 7249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:37.412746', 'step': 7249, 'epoch': 1} {'type': 'loss', 'content': 0.1101144403219223, 'timestamp': '2025-09-30 22:22:37.415538', 'step': 7250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:37.477278', 'step': 7250, 'epoch': 1} {'type': 'loss', 'content': 0.11516335606575012, 'timestamp': '2025-09-30 22:22:37.481362', 'step': 7251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:37.543530', 'step': 7251, 'epoch': 1} {'type': 'loss', 'content': 0.11278486251831055, 'timestamp': '2025-09-30 22:22:37.550067', 'step': 7252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:37.610560', 'step': 7252, 'epoch': 1} {'type': 'loss', 'content': 0.1900552213191986, 'timestamp': '2025-09-30 22:22:37.613333', 'step': 7253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:37.674531', 'step': 7253, 'epoch': 1} {'type': 'loss', 'content': 0.13672080636024475, 'timestamp': '2025-09-30 22:22:37.678050', 'step': 7254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:37.740247', 'step': 7254, 'epoch': 1} {'type': 'loss', 'content': 0.10053730756044388, 'timestamp': '2025-09-30 22:22:37.744761', 'step': 7255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:37.808504', 'step': 7255, 'epoch': 1} {'type': 'loss', 'content': 0.17208817601203918, 'timestamp': '2025-09-30 22:22:37.816423', 'step': 7256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:37.873720', 'step': 7256, 'epoch': 1} {'type': 'loss', 'content': 0.1167009249329567, 'timestamp': '2025-09-30 22:22:37.879746', 'step': 7257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:37.943217', 'step': 7257, 'epoch': 1} {'type': 'loss', 'content': 0.2070380598306656, 'timestamp': '2025-09-30 22:22:37.953103', 'step': 7258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:38.035661', 'step': 7258, 'epoch': 1} {'type': 'loss', 'content': 0.17172664403915405, 'timestamp': '2025-09-30 22:22:38.043472', 'step': 7259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:38.115198', 'step': 7259, 'epoch': 1} {'type': 'loss', 'content': 0.2424195110797882, 'timestamp': '2025-09-30 22:22:38.121541', 'step': 7260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:38.180693', 'step': 7260, 'epoch': 1} {'type': 'loss', 'content': 0.2984703481197357, 'timestamp': '2025-09-30 22:22:38.183460', 'step': 7261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:38.244625', 'step': 7261, 'epoch': 1} {'type': 'loss', 'content': 0.11231940239667892, 'timestamp': '2025-09-30 22:22:38.250730', 'step': 7262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:38.309498', 'step': 7262, 'epoch': 1} {'type': 'loss', 'content': 0.17550012469291687, 'timestamp': '2025-09-30 22:22:38.312012', 'step': 7263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:38.375970', 'step': 7263, 'epoch': 1} {'type': 'loss', 'content': 0.12837618589401245, 'timestamp': '2025-09-30 22:22:38.382023', 'step': 7264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:38.441852', 'step': 7264, 'epoch': 1} {'type': 'loss', 'content': 0.22616364061832428, 'timestamp': '2025-09-30 22:22:38.452649', 'step': 7265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:38.527987', 'step': 7265, 'epoch': 1} {'type': 'loss', 'content': 0.1464940905570984, 'timestamp': '2025-09-30 22:22:38.530498', 'step': 7266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:38.603124', 'step': 7266, 'epoch': 1} {'type': 'loss', 'content': 0.06819026172161102, 'timestamp': '2025-09-30 22:22:38.606642', 'step': 7267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:38.669723', 'step': 7267, 'epoch': 1} {'type': 'loss', 'content': 0.12506206333637238, 'timestamp': '2025-09-30 22:22:38.675896', 'step': 7268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:38.738311', 'step': 7268, 'epoch': 1} {'type': 'loss', 'content': 0.25392481684684753, 'timestamp': '2025-09-30 22:22:38.742936', 'step': 7269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:38.808503', 'step': 7269, 'epoch': 1} {'type': 'loss', 'content': 0.1542707234621048, 'timestamp': '2025-09-30 22:22:38.811313', 'step': 7270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:38.873526', 'step': 7270, 'epoch': 1} {'type': 'loss', 'content': 0.19373852014541626, 'timestamp': '2025-09-30 22:22:38.877164', 'step': 7271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:38.947941', 'step': 7271, 'epoch': 1} {'type': 'loss', 'content': 0.15217217803001404, 'timestamp': '2025-09-30 22:22:38.954494', 'step': 7272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:39.032299', 'step': 7272, 'epoch': 1} {'type': 'loss', 'content': 0.15128374099731445, 'timestamp': '2025-09-30 22:22:39.043580', 'step': 7273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:39.103654', 'step': 7273, 'epoch': 1} {'type': 'loss', 'content': 0.138045534491539, 'timestamp': '2025-09-30 22:22:39.106333', 'step': 7274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:39.164139', 'step': 7274, 'epoch': 1} {'type': 'loss', 'content': 0.07877419143915176, 'timestamp': '2025-09-30 22:22:39.166879', 'step': 7275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:39.229820', 'step': 7275, 'epoch': 1} {'type': 'loss', 'content': 0.19989372789859772, 'timestamp': '2025-09-30 22:22:39.240997', 'step': 7276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:39.312314', 'step': 7276, 'epoch': 1} {'type': 'loss', 'content': 0.09683181345462799, 'timestamp': '2025-09-30 22:22:39.316110', 'step': 7277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:39.376208', 'step': 7277, 'epoch': 1} {'type': 'loss', 'content': 0.22006741166114807, 'timestamp': '2025-09-30 22:22:39.382737', 'step': 7278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:39.440860', 'step': 7278, 'epoch': 1} {'type': 'loss', 'content': 0.12317828834056854, 'timestamp': '2025-09-30 22:22:39.443721', 'step': 7279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:39.503076', 'step': 7279, 'epoch': 1} {'type': 'loss', 'content': 0.24465182423591614, 'timestamp': '2025-09-30 22:22:39.509166', 'step': 7280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:39.572214', 'step': 7280, 'epoch': 1} {'type': 'loss', 'content': 0.27774304151535034, 'timestamp': '2025-09-30 22:22:39.576249', 'step': 7281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:39.636295', 'step': 7281, 'epoch': 1} {'type': 'loss', 'content': 0.1270570009946823, 'timestamp': '2025-09-30 22:22:39.639198', 'step': 7282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:39.696468', 'step': 7282, 'epoch': 1} {'type': 'loss', 'content': 0.15944534540176392, 'timestamp': '2025-09-30 22:22:39.700959', 'step': 7283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:39.759269', 'step': 7283, 'epoch': 1} {'type': 'loss', 'content': 0.07675179094076157, 'timestamp': '2025-09-30 22:22:39.769616', 'step': 7284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:39.826190', 'step': 7284, 'epoch': 1} {'type': 'loss', 'content': 0.07097729295492172, 'timestamp': '2025-09-30 22:22:39.833048', 'step': 7285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:39.891881', 'step': 7285, 'epoch': 1} {'type': 'loss', 'content': 0.12938958406448364, 'timestamp': '2025-09-30 22:22:39.894173', 'step': 7286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:39.953059', 'step': 7286, 'epoch': 1} {'type': 'loss', 'content': 0.11098416149616241, 'timestamp': '2025-09-30 22:22:39.955540', 'step': 7287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.014584', 'step': 7287, 'epoch': 1} {'type': 'loss', 'content': 0.1333603858947754, 'timestamp': '2025-09-30 22:22:40.032300', 'step': 7288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:40.091126', 'step': 7288, 'epoch': 1} {'type': 'loss', 'content': 0.10290425270795822, 'timestamp': '2025-09-30 22:22:40.094192', 'step': 7289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.155606', 'step': 7289, 'epoch': 1} {'type': 'loss', 'content': 0.17716462910175323, 'timestamp': '2025-09-30 22:22:40.158896', 'step': 7290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:40.218041', 'step': 7290, 'epoch': 1} {'type': 'loss', 'content': 0.2157284915447235, 'timestamp': '2025-09-30 22:22:40.221015', 'step': 7291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.279317', 'step': 7291, 'epoch': 1} {'type': 'loss', 'content': 0.18741652369499207, 'timestamp': '2025-09-30 22:22:40.291609', 'step': 7292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.368019', 'step': 7292, 'epoch': 1} {'type': 'loss', 'content': 0.1651356965303421, 'timestamp': '2025-09-30 22:22:40.371949', 'step': 7293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.432001', 'step': 7293, 'epoch': 1} {'type': 'loss', 'content': 0.11038485914468765, 'timestamp': '2025-09-30 22:22:40.434781', 'step': 7294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.492638', 'step': 7294, 'epoch': 1} {'type': 'loss', 'content': 0.16072925925254822, 'timestamp': '2025-09-30 22:22:40.495428', 'step': 7295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.559983', 'step': 7295, 'epoch': 1} {'type': 'loss', 'content': 0.07549818605184555, 'timestamp': '2025-09-30 22:22:40.567007', 'step': 7296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:40.625937', 'step': 7296, 'epoch': 1} {'type': 'loss', 'content': 0.14867272973060608, 'timestamp': '2025-09-30 22:22:40.630673', 'step': 7297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.690380', 'step': 7297, 'epoch': 1} {'type': 'loss', 'content': 0.15427938103675842, 'timestamp': '2025-09-30 22:22:40.696880', 'step': 7298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.770822', 'step': 7298, 'epoch': 1} {'type': 'loss', 'content': 0.09734780341386795, 'timestamp': '2025-09-30 22:22:40.776008', 'step': 7299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:40.844621', 'step': 7299, 'epoch': 1} {'type': 'loss', 'content': 0.11617903411388397, 'timestamp': '2025-09-30 22:22:40.851572', 'step': 7300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:40.911404', 'step': 7300, 'epoch': 1} {'type': 'loss', 'content': 0.1601509153842926, 'timestamp': '2025-09-30 22:22:40.918858', 'step': 7301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:40.990381', 'step': 7301, 'epoch': 1} {'type': 'loss', 'content': 0.16214655339717865, 'timestamp': '2025-09-30 22:22:40.993735', 'step': 7302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:41.058083', 'step': 7302, 'epoch': 1} {'type': 'loss', 'content': 0.1536940485239029, 'timestamp': '2025-09-30 22:22:41.061312', 'step': 7303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:41.127268', 'step': 7303, 'epoch': 1} {'type': 'loss', 'content': 0.11342000961303711, 'timestamp': '2025-09-30 22:22:41.134442', 'step': 7304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:41.194829', 'step': 7304, 'epoch': 1} {'type': 'loss', 'content': 0.18378221988677979, 'timestamp': '2025-09-30 22:22:41.200726', 'step': 7305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:41.270313', 'step': 7305, 'epoch': 1} {'type': 'loss', 'content': 0.2055967152118683, 'timestamp': '2025-09-30 22:22:41.278964', 'step': 7306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:41.343226', 'step': 7306, 'epoch': 1} {'type': 'loss', 'content': 0.15422147512435913, 'timestamp': '2025-09-30 22:22:41.345744', 'step': 7307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:41.413767', 'step': 7307, 'epoch': 1} {'type': 'loss', 'content': 0.15205925703048706, 'timestamp': '2025-09-30 22:22:41.420747', 'step': 7308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:41.486702', 'step': 7308, 'epoch': 1} {'type': 'loss', 'content': 0.17965684831142426, 'timestamp': '2025-09-30 22:22:41.490442', 'step': 7309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:41.551212', 'step': 7309, 'epoch': 1} {'type': 'loss', 'content': 0.19732671976089478, 'timestamp': '2025-09-30 22:22:41.561738', 'step': 7310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:41.637250', 'step': 7310, 'epoch': 1} {'type': 'loss', 'content': 0.1382431536912918, 'timestamp': '2025-09-30 22:22:41.639635', 'step': 7311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:41.702439', 'step': 7311, 'epoch': 1} {'type': 'loss', 'content': 0.20331431925296783, 'timestamp': '2025-09-30 22:22:41.708676', 'step': 7312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:41.770196', 'step': 7312, 'epoch': 1} {'type': 'loss', 'content': 0.1619950830936432, 'timestamp': '2025-09-30 22:22:41.780169', 'step': 7313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:41.840173', 'step': 7313, 'epoch': 1} {'type': 'loss', 'content': 0.19406574964523315, 'timestamp': '2025-09-30 22:22:41.843492', 'step': 7314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:41.904900', 'step': 7314, 'epoch': 1} {'type': 'loss', 'content': 0.2051645964384079, 'timestamp': '2025-09-30 22:22:41.907850', 'step': 7315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:41.975703', 'step': 7315, 'epoch': 1} {'type': 'loss', 'content': 0.11451248824596405, 'timestamp': '2025-09-30 22:22:41.982432', 'step': 7316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:42.041102', 'step': 7316, 'epoch': 1} {'type': 'loss', 'content': 0.18950295448303223, 'timestamp': '2025-09-30 22:22:42.044316', 'step': 7317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:42.102977', 'step': 7317, 'epoch': 1} {'type': 'loss', 'content': 0.20502088963985443, 'timestamp': '2025-09-30 22:22:42.106470', 'step': 7318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:42.177182', 'step': 7318, 'epoch': 1} {'type': 'loss', 'content': 0.13469241559505463, 'timestamp': '2025-09-30 22:22:42.180405', 'step': 7319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:42.243587', 'step': 7319, 'epoch': 1} {'type': 'loss', 'content': 0.17270854115486145, 'timestamp': '2025-09-30 22:22:42.256069', 'step': 7320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:42.314509', 'step': 7320, 'epoch': 1} {'type': 'loss', 'content': 0.249373197555542, 'timestamp': '2025-09-30 22:22:42.324432', 'step': 7321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:42.398769', 'step': 7321, 'epoch': 1} {'type': 'loss', 'content': 0.20022651553153992, 'timestamp': '2025-09-30 22:22:42.402482', 'step': 7322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:42.470738', 'step': 7322, 'epoch': 1} {'type': 'loss', 'content': 0.16087408363819122, 'timestamp': '2025-09-30 22:22:42.477915', 'step': 7323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:42.549454', 'step': 7323, 'epoch': 1} {'type': 'loss', 'content': 0.09933855384588242, 'timestamp': '2025-09-30 22:22:42.555854', 'step': 7324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:42.633104', 'step': 7324, 'epoch': 1} {'type': 'loss', 'content': 0.11683733016252518, 'timestamp': '2025-09-30 22:22:42.637185', 'step': 7325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:42.699109', 'step': 7325, 'epoch': 1} {'type': 'loss', 'content': 0.17571327090263367, 'timestamp': '2025-09-30 22:22:42.704228', 'step': 7326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:42.766525', 'step': 7326, 'epoch': 1} {'type': 'loss', 'content': 0.12227939069271088, 'timestamp': '2025-09-30 22:22:42.771447', 'step': 7327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:42.845701', 'step': 7327, 'epoch': 1} {'type': 'loss', 'content': 0.15334321558475494, 'timestamp': '2025-09-30 22:22:42.855672', 'step': 7328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:42.921435', 'step': 7328, 'epoch': 1} {'type': 'loss', 'content': 0.1246199905872345, 'timestamp': '2025-09-30 22:22:42.924301', 'step': 7329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:42.989274', 'step': 7329, 'epoch': 1} {'type': 'loss', 'content': 0.2056502401828766, 'timestamp': '2025-09-30 22:22:42.992779', 'step': 7330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:43.072638', 'step': 7330, 'epoch': 1} {'type': 'loss', 'content': 0.1120244562625885, 'timestamp': '2025-09-30 22:22:43.077109', 'step': 7331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:43.141998', 'step': 7331, 'epoch': 1} {'type': 'loss', 'content': 0.18036779761314392, 'timestamp': '2025-09-30 22:22:43.149017', 'step': 7332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:43.211910', 'step': 7332, 'epoch': 1} {'type': 'loss', 'content': 0.12705416977405548, 'timestamp': '2025-09-30 22:22:43.214151', 'step': 7333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:43.278586', 'step': 7333, 'epoch': 1} {'type': 'loss', 'content': 0.09841065853834152, 'timestamp': '2025-09-30 22:22:43.289867', 'step': 7334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:43.348518', 'step': 7334, 'epoch': 1} {'type': 'loss', 'content': 0.10924360156059265, 'timestamp': '2025-09-30 22:22:43.351721', 'step': 7335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:43.416685', 'step': 7335, 'epoch': 1} {'type': 'loss', 'content': 0.18881279230117798, 'timestamp': '2025-09-30 22:22:43.425996', 'step': 7336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:43.491676', 'step': 7336, 'epoch': 1} {'type': 'loss', 'content': 0.21595942974090576, 'timestamp': '2025-09-30 22:22:43.498736', 'step': 7337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:43.566772', 'step': 7337, 'epoch': 1} {'type': 'loss', 'content': 0.12062261998653412, 'timestamp': '2025-09-30 22:22:43.571681', 'step': 7338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:43.636133', 'step': 7338, 'epoch': 1} {'type': 'loss', 'content': 0.19915077090263367, 'timestamp': '2025-09-30 22:22:43.639809', 'step': 7339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:43.699136', 'step': 7339, 'epoch': 1} {'type': 'loss', 'content': 0.22038690745830536, 'timestamp': '2025-09-30 22:22:43.708584', 'step': 7340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:43.779868', 'step': 7340, 'epoch': 1} {'type': 'loss', 'content': 0.20682063698768616, 'timestamp': '2025-09-30 22:22:43.783853', 'step': 7341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:43.848591', 'step': 7341, 'epoch': 1} {'type': 'loss', 'content': 0.16935773193836212, 'timestamp': '2025-09-30 22:22:43.859460', 'step': 7342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:43.927119', 'step': 7342, 'epoch': 1} {'type': 'loss', 'content': 0.11463922262191772, 'timestamp': '2025-09-30 22:22:43.929599', 'step': 7343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:44.008293', 'step': 7343, 'epoch': 1} {'type': 'loss', 'content': 0.27250075340270996, 'timestamp': '2025-09-30 22:22:44.021300', 'step': 7344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:44.090403', 'step': 7344, 'epoch': 1} {'type': 'loss', 'content': 0.13399429619312286, 'timestamp': '2025-09-30 22:22:44.096534', 'step': 7345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:44.156833', 'step': 7345, 'epoch': 1} {'type': 'loss', 'content': 0.13717025518417358, 'timestamp': '2025-09-30 22:22:44.160442', 'step': 7346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:44.225499', 'step': 7346, 'epoch': 1} {'type': 'loss', 'content': 0.15468207001686096, 'timestamp': '2025-09-30 22:22:44.230446', 'step': 7347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:44.289757', 'step': 7347, 'epoch': 1} {'type': 'loss', 'content': 0.1403810977935791, 'timestamp': '2025-09-30 22:22:44.296428', 'step': 7348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:44.362556', 'step': 7348, 'epoch': 1} {'type': 'loss', 'content': 0.06507668644189835, 'timestamp': '2025-09-30 22:22:44.370036', 'step': 7349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:44.428801', 'step': 7349, 'epoch': 1} {'type': 'loss', 'content': 0.11233749240636826, 'timestamp': '2025-09-30 22:22:44.436933', 'step': 7350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:44.508981', 'step': 7350, 'epoch': 1} {'type': 'loss', 'content': 0.12052122503519058, 'timestamp': '2025-09-30 22:22:44.511661', 'step': 7351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:44.569159', 'step': 7351, 'epoch': 1} {'type': 'loss', 'content': 0.16040153801441193, 'timestamp': '2025-09-30 22:22:44.575745', 'step': 7352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:44.636040', 'step': 7352, 'epoch': 1} {'type': 'loss', 'content': 0.17034727334976196, 'timestamp': '2025-09-30 22:22:44.640697', 'step': 7353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:44.715972', 'step': 7353, 'epoch': 1} {'type': 'loss', 'content': 0.1513453871011734, 'timestamp': '2025-09-30 22:22:44.719289', 'step': 7354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:44.790525', 'step': 7354, 'epoch': 1} {'type': 'loss', 'content': 0.23446685075759888, 'timestamp': '2025-09-30 22:22:44.798850', 'step': 7355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:44.857130', 'step': 7355, 'epoch': 1} {'type': 'loss', 'content': 0.10743632912635803, 'timestamp': '2025-09-30 22:22:44.865420', 'step': 7356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:44.936947', 'step': 7356, 'epoch': 1} {'type': 'loss', 'content': 0.19491279125213623, 'timestamp': '2025-09-30 22:22:44.941822', 'step': 7357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:45.012950', 'step': 7357, 'epoch': 1} {'type': 'loss', 'content': 0.10224010795354843, 'timestamp': '2025-09-30 22:22:45.017478', 'step': 7358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:45.083633', 'step': 7358, 'epoch': 1} {'type': 'loss', 'content': 0.16540096700191498, 'timestamp': '2025-09-30 22:22:45.085966', 'step': 7359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:45.144356', 'step': 7359, 'epoch': 1} {'type': 'loss', 'content': 0.12396981567144394, 'timestamp': '2025-09-30 22:22:45.153349', 'step': 7360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:45.215924', 'step': 7360, 'epoch': 1} {'type': 'loss', 'content': 0.19019633531570435, 'timestamp': '2025-09-30 22:22:45.220735', 'step': 7361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:45.282960', 'step': 7361, 'epoch': 1} {'type': 'loss', 'content': 0.10328427702188492, 'timestamp': '2025-09-30 22:22:45.286986', 'step': 7362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:45.362175', 'step': 7362, 'epoch': 1} {'type': 'loss', 'content': 0.08467163890600204, 'timestamp': '2025-09-30 22:22:45.365097', 'step': 7363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:45.425750', 'step': 7363, 'epoch': 1} {'type': 'loss', 'content': 0.27559053897857666, 'timestamp': '2025-09-30 22:22:45.433205', 'step': 7364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:45.488665', 'step': 7364, 'epoch': 1} {'type': 'loss', 'content': 0.22203491628170013, 'timestamp': '2025-09-30 22:22:45.492727', 'step': 7365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:45.563612', 'step': 7365, 'epoch': 1} {'type': 'loss', 'content': 0.09884835034608841, 'timestamp': '2025-09-30 22:22:45.567138', 'step': 7366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:45.624803', 'step': 7366, 'epoch': 1} {'type': 'loss', 'content': 0.1224847212433815, 'timestamp': '2025-09-30 22:22:45.627598', 'step': 7367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:45.691077', 'step': 7367, 'epoch': 1} {'type': 'loss', 'content': 0.2209344506263733, 'timestamp': '2025-09-30 22:22:45.699250', 'step': 7368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:45.759803', 'step': 7368, 'epoch': 1} {'type': 'loss', 'content': 0.1616152822971344, 'timestamp': '2025-09-30 22:22:45.763699', 'step': 7369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:45.822847', 'step': 7369, 'epoch': 1} {'type': 'loss', 'content': 0.1286352127790451, 'timestamp': '2025-09-30 22:22:45.830280', 'step': 7370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:22:45.892155', 'step': 7370, 'epoch': 1} {'type': 'loss', 'content': 0.14796379208564758, 'timestamp': '2025-09-30 22:22:45.895527', 'step': 7371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:45.954694', 'step': 7371, 'epoch': 1} {'type': 'loss', 'content': 0.15250049531459808, 'timestamp': '2025-09-30 22:22:45.962977', 'step': 7372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:46.025287', 'step': 7372, 'epoch': 1} {'type': 'loss', 'content': 0.17474296689033508, 'timestamp': '2025-09-30 22:22:46.027909', 'step': 7373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:46.101312', 'step': 7373, 'epoch': 1} {'type': 'loss', 'content': 0.18622712790966034, 'timestamp': '2025-09-30 22:22:46.107742', 'step': 7374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:46.169503', 'step': 7374, 'epoch': 1} {'type': 'loss', 'content': 0.14523327350616455, 'timestamp': '2025-09-30 22:22:46.173039', 'step': 7375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:46.238590', 'step': 7375, 'epoch': 1} {'type': 'loss', 'content': 0.2126712203025818, 'timestamp': '2025-09-30 22:22:46.244580', 'step': 7376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:46.307647', 'step': 7376, 'epoch': 1} {'type': 'loss', 'content': 0.196175217628479, 'timestamp': '2025-09-30 22:22:46.310169', 'step': 7377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:46.366577', 'step': 7377, 'epoch': 1} {'type': 'loss', 'content': 0.14864389598369598, 'timestamp': '2025-09-30 22:22:46.369638', 'step': 7378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:46.430914', 'step': 7378, 'epoch': 1} {'type': 'loss', 'content': 0.13891610503196716, 'timestamp': '2025-09-30 22:22:46.433295', 'step': 7379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:46.492099', 'step': 7379, 'epoch': 1} {'type': 'loss', 'content': 0.14153052866458893, 'timestamp': '2025-09-30 22:22:46.499251', 'step': 7380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:46.561235', 'step': 7380, 'epoch': 1} {'type': 'loss', 'content': 0.20178230106830597, 'timestamp': '2025-09-30 22:22:46.564202', 'step': 7381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:46.626201', 'step': 7381, 'epoch': 1} {'type': 'loss', 'content': 0.19447556138038635, 'timestamp': '2025-09-30 22:22:46.633287', 'step': 7382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:46.703228', 'step': 7382, 'epoch': 1} {'type': 'loss', 'content': 0.15872010588645935, 'timestamp': '2025-09-30 22:22:46.706443', 'step': 7383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:46.776066', 'step': 7383, 'epoch': 1} {'type': 'loss', 'content': 0.12752887606620789, 'timestamp': '2025-09-30 22:22:46.790447', 'step': 7384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:46.866280', 'step': 7384, 'epoch': 1} {'type': 'loss', 'content': 0.242035374045372, 'timestamp': '2025-09-30 22:22:46.869870', 'step': 7385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:46.935310', 'step': 7385, 'epoch': 1} {'type': 'loss', 'content': 0.12163939327001572, 'timestamp': '2025-09-30 22:22:46.939026', 'step': 7386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:47.015088', 'step': 7386, 'epoch': 1} {'type': 'loss', 'content': 0.0790192186832428, 'timestamp': '2025-09-30 22:22:47.017991', 'step': 7387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:47.080183', 'step': 7387, 'epoch': 1} {'type': 'loss', 'content': 0.11311347037553787, 'timestamp': '2025-09-30 22:22:47.086981', 'step': 7388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:47.164795', 'step': 7388, 'epoch': 1} {'type': 'loss', 'content': 0.24148060381412506, 'timestamp': '2025-09-30 22:22:47.167452', 'step': 7389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:47.229965', 'step': 7389, 'epoch': 1} {'type': 'loss', 'content': 0.14858970046043396, 'timestamp': '2025-09-30 22:22:47.232880', 'step': 7390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:47.296803', 'step': 7390, 'epoch': 1} {'type': 'loss', 'content': 0.23603300750255585, 'timestamp': '2025-09-30 22:22:47.304788', 'step': 7391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:47.383374', 'step': 7391, 'epoch': 1} {'type': 'loss', 'content': 0.15346801280975342, 'timestamp': '2025-09-30 22:22:47.389839', 'step': 7392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:47.451738', 'step': 7392, 'epoch': 1} {'type': 'loss', 'content': 0.1872006207704544, 'timestamp': '2025-09-30 22:22:47.459381', 'step': 7393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:47.516721', 'step': 7393, 'epoch': 1} {'type': 'loss', 'content': 0.17640428245067596, 'timestamp': '2025-09-30 22:22:47.519873', 'step': 7394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:47.577058', 'step': 7394, 'epoch': 1} {'type': 'loss', 'content': 0.12984080612659454, 'timestamp': '2025-09-30 22:22:47.585522', 'step': 7395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:47.679088', 'step': 7395, 'epoch': 1} {'type': 'loss', 'content': 0.12734124064445496, 'timestamp': '2025-09-30 22:22:47.688944', 'step': 7396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:47.745889', 'step': 7396, 'epoch': 1} {'type': 'loss', 'content': 0.1280113011598587, 'timestamp': '2025-09-30 22:22:47.749879', 'step': 7397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:47.828521', 'step': 7397, 'epoch': 1} {'type': 'loss', 'content': 0.12834888696670532, 'timestamp': '2025-09-30 22:22:47.839004', 'step': 7398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:47.906479', 'step': 7398, 'epoch': 1} {'type': 'loss', 'content': 0.13844874501228333, 'timestamp': '2025-09-30 22:22:47.909619', 'step': 7399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:47.979861', 'step': 7399, 'epoch': 1} {'type': 'loss', 'content': 0.12596336007118225, 'timestamp': '2025-09-30 22:22:47.986629', 'step': 7400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:48.060768', 'step': 7400, 'epoch': 1} {'type': 'loss', 'content': 0.24440424144268036, 'timestamp': '2025-09-30 22:22:48.070451', 'step': 7401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:48.153712', 'step': 7401, 'epoch': 1} {'type': 'loss', 'content': 0.1858706921339035, 'timestamp': '2025-09-30 22:22:48.156567', 'step': 7402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:48.215565', 'step': 7402, 'epoch': 1} {'type': 'loss', 'content': 0.1699400544166565, 'timestamp': '2025-09-30 22:22:48.218314', 'step': 7403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:48.284721', 'step': 7403, 'epoch': 1} {'type': 'loss', 'content': 0.17474931478500366, 'timestamp': '2025-09-30 22:22:48.290506', 'step': 7404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:48.369415', 'step': 7404, 'epoch': 1} {'type': 'loss', 'content': 0.10602059215307236, 'timestamp': '2025-09-30 22:22:48.374635', 'step': 7405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:48.460529', 'step': 7405, 'epoch': 1} {'type': 'loss', 'content': 0.15782709419727325, 'timestamp': '2025-09-30 22:22:48.463524', 'step': 7406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:48.542862', 'step': 7406, 'epoch': 1} {'type': 'loss', 'content': 0.18571656942367554, 'timestamp': '2025-09-30 22:22:48.548733', 'step': 7407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:48.626585', 'step': 7407, 'epoch': 1} {'type': 'loss', 'content': 0.3248090445995331, 'timestamp': '2025-09-30 22:22:48.633782', 'step': 7408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:48.691818', 'step': 7408, 'epoch': 1} {'type': 'loss', 'content': 0.22959987819194794, 'timestamp': '2025-09-30 22:22:48.696726', 'step': 7409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:48.754850', 'step': 7409, 'epoch': 1} {'type': 'loss', 'content': 0.11384019255638123, 'timestamp': '2025-09-30 22:22:48.757937', 'step': 7410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:48.828117', 'step': 7410, 'epoch': 1} {'type': 'loss', 'content': 0.08710763603448868, 'timestamp': '2025-09-30 22:22:48.837401', 'step': 7411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:48.894667', 'step': 7411, 'epoch': 1} {'type': 'loss', 'content': 0.11534950882196426, 'timestamp': '2025-09-30 22:22:48.900639', 'step': 7412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:48.968640', 'step': 7412, 'epoch': 1} {'type': 'loss', 'content': 0.20546585321426392, 'timestamp': '2025-09-30 22:22:48.976382', 'step': 7413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:49.050113', 'step': 7413, 'epoch': 1} {'type': 'loss', 'content': 0.07306358218193054, 'timestamp': '2025-09-30 22:22:49.053949', 'step': 7414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:49.115837', 'step': 7414, 'epoch': 1} {'type': 'loss', 'content': 0.05939651280641556, 'timestamp': '2025-09-30 22:22:49.119765', 'step': 7415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:49.181793', 'step': 7415, 'epoch': 1} {'type': 'loss', 'content': 0.12227340042591095, 'timestamp': '2025-09-30 22:22:49.190257', 'step': 7416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:49.287096', 'step': 7416, 'epoch': 1} {'type': 'loss', 'content': 0.25217607617378235, 'timestamp': '2025-09-30 22:22:49.290936', 'step': 7417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:49.352640', 'step': 7417, 'epoch': 1} {'type': 'loss', 'content': 0.06425465643405914, 'timestamp': '2025-09-30 22:22:49.355869', 'step': 7418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:49.443186', 'step': 7418, 'epoch': 1} {'type': 'loss', 'content': 0.17882592976093292, 'timestamp': '2025-09-30 22:22:49.448166', 'step': 7419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:49.508844', 'step': 7419, 'epoch': 1} {'type': 'loss', 'content': 0.17911143600940704, 'timestamp': '2025-09-30 22:22:49.517302', 'step': 7420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:49.576806', 'step': 7420, 'epoch': 1} {'type': 'loss', 'content': 0.06408946961164474, 'timestamp': '2025-09-30 22:22:49.585893', 'step': 7421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:49.645553', 'step': 7421, 'epoch': 1} {'type': 'loss', 'content': 0.16418281197547913, 'timestamp': '2025-09-30 22:22:49.649726', 'step': 7422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:49.717246', 'step': 7422, 'epoch': 1} {'type': 'loss', 'content': 0.1538536697626114, 'timestamp': '2025-09-30 22:22:49.722120', 'step': 7423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:49.782791', 'step': 7423, 'epoch': 1} {'type': 'loss', 'content': 0.1154218390583992, 'timestamp': '2025-09-30 22:22:49.791307', 'step': 7424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:49.849870', 'step': 7424, 'epoch': 1} {'type': 'loss', 'content': 0.25620603561401367, 'timestamp': '2025-09-30 22:22:49.855738', 'step': 7425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:49.921506', 'step': 7425, 'epoch': 1} {'type': 'loss', 'content': 0.13341380655765533, 'timestamp': '2025-09-30 22:22:49.927131', 'step': 7426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:49.990082', 'step': 7426, 'epoch': 1} {'type': 'loss', 'content': 0.11509411036968231, 'timestamp': '2025-09-30 22:22:49.993152', 'step': 7427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:50.049541', 'step': 7427, 'epoch': 1} {'type': 'loss', 'content': 0.08353325724601746, 'timestamp': '2025-09-30 22:22:50.059609', 'step': 7428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:50.123676', 'step': 7428, 'epoch': 1} {'type': 'loss', 'content': 0.18240880966186523, 'timestamp': '2025-09-30 22:22:50.126850', 'step': 7429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:50.207108', 'step': 7429, 'epoch': 1} {'type': 'loss', 'content': 0.13628245890140533, 'timestamp': '2025-09-30 22:22:50.209875', 'step': 7430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:50.272179', 'step': 7430, 'epoch': 1} {'type': 'loss', 'content': 0.2172199934720993, 'timestamp': '2025-09-30 22:22:50.274538', 'step': 7431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:50.335582', 'step': 7431, 'epoch': 1} {'type': 'loss', 'content': 0.18370766937732697, 'timestamp': '2025-09-30 22:22:50.341893', 'step': 7432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:50.406460', 'step': 7432, 'epoch': 1} {'type': 'loss', 'content': 0.1553775519132614, 'timestamp': '2025-09-30 22:22:50.412673', 'step': 7433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:22:50.475195', 'step': 7433, 'epoch': 1} {'type': 'loss', 'content': 0.17103788256645203, 'timestamp': '2025-09-30 22:22:50.485126', 'step': 7434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:50.542285', 'step': 7434, 'epoch': 1} {'type': 'loss', 'content': 0.14871349930763245, 'timestamp': '2025-09-30 22:22:50.544308', 'step': 7435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:50.601647', 'step': 7435, 'epoch': 1} {'type': 'loss', 'content': 0.14158158004283905, 'timestamp': '2025-09-30 22:22:50.607797', 'step': 7436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:50.663610', 'step': 7436, 'epoch': 1} {'type': 'loss', 'content': 0.1583019345998764, 'timestamp': '2025-09-30 22:22:50.666172', 'step': 7437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:50.737102', 'step': 7437, 'epoch': 1} {'type': 'loss', 'content': 0.0729793831706047, 'timestamp': '2025-09-30 22:22:50.739476', 'step': 7438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:50.796058', 'step': 7438, 'epoch': 1} {'type': 'loss', 'content': 0.1444123387336731, 'timestamp': '2025-09-30 22:22:50.798384', 'step': 7439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:50.854729', 'step': 7439, 'epoch': 1} {'type': 'loss', 'content': 0.20478928089141846, 'timestamp': '2025-09-30 22:22:50.861255', 'step': 7440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:50.923510', 'step': 7440, 'epoch': 1} {'type': 'loss', 'content': 0.1561122089624405, 'timestamp': '2025-09-30 22:22:50.927563', 'step': 7441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:50.989559', 'step': 7441, 'epoch': 1} {'type': 'loss', 'content': 0.13785018026828766, 'timestamp': '2025-09-30 22:22:50.992765', 'step': 7442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:22:51.052640', 'step': 7442, 'epoch': 1} {'type': 'loss', 'content': 0.2627735137939453, 'timestamp': '2025-09-30 22:22:51.055426', 'step': 7443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:51.126169', 'step': 7443, 'epoch': 1} {'type': 'loss', 'content': 0.07364370673894882, 'timestamp': '2025-09-30 22:22:51.132194', 'step': 7444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:51.194233', 'step': 7444, 'epoch': 1} {'type': 'loss', 'content': 0.16041338443756104, 'timestamp': '2025-09-30 22:22:51.196430', 'step': 7445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:51.271694', 'step': 7445, 'epoch': 1} {'type': 'loss', 'content': 0.1363837867975235, 'timestamp': '2025-09-30 22:22:51.274193', 'step': 7446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:51.331457', 'step': 7446, 'epoch': 1} {'type': 'loss', 'content': 0.23340514302253723, 'timestamp': '2025-09-30 22:22:51.333869', 'step': 7447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:51.419477', 'step': 7447, 'epoch': 1} {'type': 'loss', 'content': 0.17195183038711548, 'timestamp': '2025-09-30 22:22:51.428371', 'step': 7448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:22:51.490368', 'step': 7448, 'epoch': 1} {'type': 'loss', 'content': 0.13511872291564941, 'timestamp': '2025-09-30 22:22:51.493805', 'step': 7449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:51.552717', 'step': 7449, 'epoch': 1} {'type': 'loss', 'content': 0.145600363612175, 'timestamp': '2025-09-30 22:22:51.557902', 'step': 7450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:51.619576', 'step': 7450, 'epoch': 1} {'type': 'loss', 'content': 0.18748749792575836, 'timestamp': '2025-09-30 22:22:51.622470', 'step': 7451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:51.688781', 'step': 7451, 'epoch': 1} {'type': 'loss', 'content': 0.1253645271062851, 'timestamp': '2025-09-30 22:22:51.695495', 'step': 7452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:22:51.754727', 'step': 7452, 'epoch': 1} {'type': 'loss', 'content': 0.11510420590639114, 'timestamp': '2025-09-30 22:22:51.757120', 'step': 7453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:22:51.819055', 'step': 7453, 'epoch': 1} {'type': 'loss', 'content': 0.06416689604520798, 'timestamp': '2025-09-30 22:22:51.824938', 'step': 7454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:22:51.891650', 'step': 7454, 'epoch': 1} {'type': 'loss', 'content': 0.39850476384162903, 'timestamp': '2025-09-30 22:22:51.895165', 'step': 7455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:22:51.951672', 'step': 7455, 'epoch': 1} {'type': 'loss', 'content': 0.14693251252174377, 'timestamp': '2025-09-30 22:22:51.958100', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:23:09.114109', 'step': 7456, 'epoch': 1} {'type': 'pplx', 'content': 12337.656987004371, 'timestamp': '2025-09-30 22:23:09.128201', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:09.187067', 'step': 7456, 'epoch': 1} {'type': 'loss', 'content': 0.15622387826442719, 'timestamp': '2025-09-30 22:23:09.191332', 'step': 7457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:09.249841', 'step': 7457, 'epoch': 1} {'type': 'loss', 'content': 0.11985864490270615, 'timestamp': '2025-09-30 22:23:09.258910', 'step': 7458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:09.318380', 'step': 7458, 'epoch': 1} {'type': 'loss', 'content': 0.07670111954212189, 'timestamp': '2025-09-30 22:23:09.321807', 'step': 7459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-09-30 22:23:09.408133', 'step': 7459, 'epoch': 1} {'type': 'loss', 'content': 0.3343338072299957, 'timestamp': '2025-09-30 22:23:09.414585', 'step': 7460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:09.475624', 'step': 7460, 'epoch': 2} {'type': 'loss', 'content': 0.07377415895462036, 'timestamp': '2025-09-30 22:23:09.480719', 'step': 7461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:09.542833', 'step': 7461, 'epoch': 2} {'type': 'loss', 'content': 0.10814649611711502, 'timestamp': '2025-09-30 22:23:09.545805', 'step': 7462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:09.604232', 'step': 7462, 'epoch': 2} {'type': 'loss', 'content': 0.14370287954807281, 'timestamp': '2025-09-30 22:23:09.607202', 'step': 7463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:09.673671', 'step': 7463, 'epoch': 2} {'type': 'loss', 'content': 0.13030515611171722, 'timestamp': '2025-09-30 22:23:09.682805', 'step': 7464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:09.742515', 'step': 7464, 'epoch': 2} {'type': 'loss', 'content': 0.20416994392871857, 'timestamp': '2025-09-30 22:23:09.753162', 'step': 7465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:09.812967', 'step': 7465, 'epoch': 2} {'type': 'loss', 'content': 0.09351005405187607, 'timestamp': '2025-09-30 22:23:09.817093', 'step': 7466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:09.875102', 'step': 7466, 'epoch': 2} {'type': 'loss', 'content': 0.1659967005252838, 'timestamp': '2025-09-30 22:23:09.884732', 'step': 7467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:09.959865', 'step': 7467, 'epoch': 2} {'type': 'loss', 'content': 0.09385473281145096, 'timestamp': '2025-09-30 22:23:09.966798', 'step': 7468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:10.027524', 'step': 7468, 'epoch': 2} {'type': 'loss', 'content': 0.13866135478019714, 'timestamp': '2025-09-30 22:23:10.030570', 'step': 7469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:10.089092', 'step': 7469, 'epoch': 2} {'type': 'loss', 'content': 0.14764200150966644, 'timestamp': '2025-09-30 22:23:10.093908', 'step': 7470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:10.156577', 'step': 7470, 'epoch': 2} {'type': 'loss', 'content': 0.11881338059902191, 'timestamp': '2025-09-30 22:23:10.168134', 'step': 7471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:10.227546', 'step': 7471, 'epoch': 2} {'type': 'loss', 'content': 0.15411999821662903, 'timestamp': '2025-09-30 22:23:10.240049', 'step': 7472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:10.301382', 'step': 7472, 'epoch': 2} {'type': 'loss', 'content': 0.11916911602020264, 'timestamp': '2025-09-30 22:23:10.304069', 'step': 7473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:10.363833', 'step': 7473, 'epoch': 2} {'type': 'loss', 'content': 0.11768665164709091, 'timestamp': '2025-09-30 22:23:10.366424', 'step': 7474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:10.426602', 'step': 7474, 'epoch': 2} {'type': 'loss', 'content': 0.1289546638727188, 'timestamp': '2025-09-30 22:23:10.434738', 'step': 7475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:10.499061', 'step': 7475, 'epoch': 2} {'type': 'loss', 'content': 0.20225876569747925, 'timestamp': '2025-09-30 22:23:10.508365', 'step': 7476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:10.572503', 'step': 7476, 'epoch': 2} {'type': 'loss', 'content': 0.10222774744033813, 'timestamp': '2025-09-30 22:23:10.575269', 'step': 7477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:10.633514', 'step': 7477, 'epoch': 2} {'type': 'loss', 'content': 0.17515769600868225, 'timestamp': '2025-09-30 22:23:10.638483', 'step': 7478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:10.728012', 'step': 7478, 'epoch': 2} {'type': 'loss', 'content': 0.23799239099025726, 'timestamp': '2025-09-30 22:23:10.731121', 'step': 7479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:10.799959', 'step': 7479, 'epoch': 2} {'type': 'loss', 'content': 0.09372387826442719, 'timestamp': '2025-09-30 22:23:10.810911', 'step': 7480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:10.885303', 'step': 7480, 'epoch': 2} {'type': 'loss', 'content': 0.09265732020139694, 'timestamp': '2025-09-30 22:23:10.898143', 'step': 7481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:10.969018', 'step': 7481, 'epoch': 2} {'type': 'loss', 'content': 0.1446910947561264, 'timestamp': '2025-09-30 22:23:10.972489', 'step': 7482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:11.030402', 'step': 7482, 'epoch': 2} {'type': 'loss', 'content': 0.07543208450078964, 'timestamp': '2025-09-30 22:23:11.033473', 'step': 7483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:11.091710', 'step': 7483, 'epoch': 2} {'type': 'loss', 'content': 0.18289656937122345, 'timestamp': '2025-09-30 22:23:11.098122', 'step': 7484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:11.181547', 'step': 7484, 'epoch': 2} {'type': 'loss', 'content': 0.0720670148730278, 'timestamp': '2025-09-30 22:23:11.184974', 'step': 7485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:11.256604', 'step': 7485, 'epoch': 2} {'type': 'loss', 'content': 0.14258311688899994, 'timestamp': '2025-09-30 22:23:11.267376', 'step': 7486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:11.338348', 'step': 7486, 'epoch': 2} {'type': 'loss', 'content': 0.2382989227771759, 'timestamp': '2025-09-30 22:23:11.341548', 'step': 7487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:11.408872', 'step': 7487, 'epoch': 2} {'type': 'loss', 'content': 0.1329611837863922, 'timestamp': '2025-09-30 22:23:11.415841', 'step': 7488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:11.476752', 'step': 7488, 'epoch': 2} {'type': 'loss', 'content': 0.12806712090969086, 'timestamp': '2025-09-30 22:23:11.481538', 'step': 7489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:11.589109', 'step': 7489, 'epoch': 2} {'type': 'loss', 'content': 0.06727715581655502, 'timestamp': '2025-09-30 22:23:11.592586', 'step': 7490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:11.658847', 'step': 7490, 'epoch': 2} {'type': 'loss', 'content': 0.1754271388053894, 'timestamp': '2025-09-30 22:23:11.666847', 'step': 7491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:11.724851', 'step': 7491, 'epoch': 2} {'type': 'loss', 'content': 0.09836970269680023, 'timestamp': '2025-09-30 22:23:11.731498', 'step': 7492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:11.788124', 'step': 7492, 'epoch': 2} {'type': 'loss', 'content': 0.24740026891231537, 'timestamp': '2025-09-30 22:23:11.791332', 'step': 7493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:11.848599', 'step': 7493, 'epoch': 2} {'type': 'loss', 'content': 0.14059032499790192, 'timestamp': '2025-09-30 22:23:11.857917', 'step': 7494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:11.923269', 'step': 7494, 'epoch': 2} {'type': 'loss', 'content': 0.2260526418685913, 'timestamp': '2025-09-30 22:23:11.926898', 'step': 7495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:12.011446', 'step': 7495, 'epoch': 2} {'type': 'loss', 'content': 0.08854664117097855, 'timestamp': '2025-09-30 22:23:12.018395', 'step': 7496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:12.076406', 'step': 7496, 'epoch': 2} {'type': 'loss', 'content': 0.16085202991962433, 'timestamp': '2025-09-30 22:23:12.080356', 'step': 7497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:12.141169', 'step': 7497, 'epoch': 2} {'type': 'loss', 'content': 0.11733643710613251, 'timestamp': '2025-09-30 22:23:12.148898', 'step': 7498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:12.209263', 'step': 7498, 'epoch': 2} {'type': 'loss', 'content': 0.17675736546516418, 'timestamp': '2025-09-30 22:23:12.219217', 'step': 7499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:12.280540', 'step': 7499, 'epoch': 2} {'type': 'loss', 'content': 0.15177202224731445, 'timestamp': '2025-09-30 22:23:12.286629', 'step': 7500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 7500', 'timestamp': '2025-09-30 22:23:12.707721', 'step': 7500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:12.773909', 'step': 7500, 'epoch': 2} {'type': 'loss', 'content': 0.11316443234682083, 'timestamp': '2025-09-30 22:23:12.782814', 'step': 7501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:12.846975', 'step': 7501, 'epoch': 2} {'type': 'loss', 'content': 0.128428116440773, 'timestamp': '2025-09-30 22:23:12.850254', 'step': 7502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:12.912756', 'step': 7502, 'epoch': 2} {'type': 'loss', 'content': 0.14180722832679749, 'timestamp': '2025-09-30 22:23:12.915532', 'step': 7503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:12.977502', 'step': 7503, 'epoch': 2} {'type': 'loss', 'content': 0.1616123467683792, 'timestamp': '2025-09-30 22:23:12.985707', 'step': 7504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:13.045346', 'step': 7504, 'epoch': 2} {'type': 'loss', 'content': 0.20616304874420166, 'timestamp': '2025-09-30 22:23:13.050497', 'step': 7505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:13.117224', 'step': 7505, 'epoch': 2} {'type': 'loss', 'content': 0.1657475084066391, 'timestamp': '2025-09-30 22:23:13.120815', 'step': 7506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:13.189937', 'step': 7506, 'epoch': 2} {'type': 'loss', 'content': 0.1436357945203781, 'timestamp': '2025-09-30 22:23:13.195285', 'step': 7507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:13.283972', 'step': 7507, 'epoch': 2} {'type': 'loss', 'content': 0.22034607827663422, 'timestamp': '2025-09-30 22:23:13.291029', 'step': 7508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:13.361467', 'step': 7508, 'epoch': 2} {'type': 'loss', 'content': 0.1689726859331131, 'timestamp': '2025-09-30 22:23:13.365091', 'step': 7509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:13.423973', 'step': 7509, 'epoch': 2} {'type': 'loss', 'content': 0.09469582885503769, 'timestamp': '2025-09-30 22:23:13.427087', 'step': 7510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:13.486652', 'step': 7510, 'epoch': 2} {'type': 'loss', 'content': 0.17062845826148987, 'timestamp': '2025-09-30 22:23:13.490107', 'step': 7511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:13.549191', 'step': 7511, 'epoch': 2} {'type': 'loss', 'content': 0.21843494474887848, 'timestamp': '2025-09-30 22:23:13.557701', 'step': 7512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:13.622070', 'step': 7512, 'epoch': 2} {'type': 'loss', 'content': 0.12318788468837738, 'timestamp': '2025-09-30 22:23:13.639007', 'step': 7513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:13.710936', 'step': 7513, 'epoch': 2} {'type': 'loss', 'content': 0.11769627034664154, 'timestamp': '2025-09-30 22:23:13.715173', 'step': 7514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:13.790844', 'step': 7514, 'epoch': 2} {'type': 'loss', 'content': 0.15485186874866486, 'timestamp': '2025-09-30 22:23:13.807796', 'step': 7515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:13.874779', 'step': 7515, 'epoch': 2} {'type': 'loss', 'content': 0.09695363789796829, 'timestamp': '2025-09-30 22:23:13.881562', 'step': 7516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:13.947515', 'step': 7516, 'epoch': 2} {'type': 'loss', 'content': 0.11101100593805313, 'timestamp': '2025-09-30 22:23:13.950479', 'step': 7517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:14.017600', 'step': 7517, 'epoch': 2} {'type': 'loss', 'content': 0.1789354383945465, 'timestamp': '2025-09-30 22:23:14.021709', 'step': 7518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:14.098000', 'step': 7518, 'epoch': 2} {'type': 'loss', 'content': 0.1844528466463089, 'timestamp': '2025-09-30 22:23:14.109240', 'step': 7519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:14.171794', 'step': 7519, 'epoch': 2} {'type': 'loss', 'content': 0.1640823930501938, 'timestamp': '2025-09-30 22:23:14.178523', 'step': 7520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:14.258428', 'step': 7520, 'epoch': 2} {'type': 'loss', 'content': 0.13159368932247162, 'timestamp': '2025-09-30 22:23:14.261062', 'step': 7521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:14.320750', 'step': 7521, 'epoch': 2} {'type': 'loss', 'content': 0.12162979692220688, 'timestamp': '2025-09-30 22:23:14.323858', 'step': 7522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:14.382613', 'step': 7522, 'epoch': 2} {'type': 'loss', 'content': 0.2089373916387558, 'timestamp': '2025-09-30 22:23:14.390712', 'step': 7523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:14.451904', 'step': 7523, 'epoch': 2} {'type': 'loss', 'content': 0.11447358131408691, 'timestamp': '2025-09-30 22:23:14.460405', 'step': 7524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:14.525252', 'step': 7524, 'epoch': 2} {'type': 'loss', 'content': 0.06508571654558182, 'timestamp': '2025-09-30 22:23:14.529738', 'step': 7525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:14.613818', 'step': 7525, 'epoch': 2} {'type': 'loss', 'content': 0.15613971650600433, 'timestamp': '2025-09-30 22:23:14.617232', 'step': 7526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:14.681377', 'step': 7526, 'epoch': 2} {'type': 'loss', 'content': 0.10198503732681274, 'timestamp': '2025-09-30 22:23:14.683931', 'step': 7527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:14.773133', 'step': 7527, 'epoch': 2} {'type': 'loss', 'content': 0.12752188742160797, 'timestamp': '2025-09-30 22:23:14.784603', 'step': 7528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:14.843736', 'step': 7528, 'epoch': 2} {'type': 'loss', 'content': 0.2055177390575409, 'timestamp': '2025-09-30 22:23:14.846207', 'step': 7529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:14.905234', 'step': 7529, 'epoch': 2} {'type': 'loss', 'content': 0.186528280377388, 'timestamp': '2025-09-30 22:23:14.908000', 'step': 7530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:14.971011', 'step': 7530, 'epoch': 2} {'type': 'loss', 'content': 0.11841391026973724, 'timestamp': '2025-09-30 22:23:14.974739', 'step': 7531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:15.055671', 'step': 7531, 'epoch': 2} {'type': 'loss', 'content': 0.13050661981105804, 'timestamp': '2025-09-30 22:23:15.061984', 'step': 7532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:15.133744', 'step': 7532, 'epoch': 2} {'type': 'loss', 'content': 0.15566661953926086, 'timestamp': '2025-09-30 22:23:15.136582', 'step': 7533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:15.215041', 'step': 7533, 'epoch': 2} {'type': 'loss', 'content': 0.08788054436445236, 'timestamp': '2025-09-30 22:23:15.219609', 'step': 7534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:15.277982', 'step': 7534, 'epoch': 2} {'type': 'loss', 'content': 0.1326477825641632, 'timestamp': '2025-09-30 22:23:15.280767', 'step': 7535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:15.338280', 'step': 7535, 'epoch': 2} {'type': 'loss', 'content': 0.2502173185348511, 'timestamp': '2025-09-30 22:23:15.347325', 'step': 7536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:15.407171', 'step': 7536, 'epoch': 2} {'type': 'loss', 'content': 0.11636513471603394, 'timestamp': '2025-09-30 22:23:15.411379', 'step': 7537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:15.469830', 'step': 7537, 'epoch': 2} {'type': 'loss', 'content': 0.12381783127784729, 'timestamp': '2025-09-30 22:23:15.473477', 'step': 7538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:15.539222', 'step': 7538, 'epoch': 2} {'type': 'loss', 'content': 0.14542952179908752, 'timestamp': '2025-09-30 22:23:15.544631', 'step': 7539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:15.615473', 'step': 7539, 'epoch': 2} {'type': 'loss', 'content': 0.15533477067947388, 'timestamp': '2025-09-30 22:23:15.628832', 'step': 7540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:15.685520', 'step': 7540, 'epoch': 2} {'type': 'loss', 'content': 0.13480965793132782, 'timestamp': '2025-09-30 22:23:15.688878', 'step': 7541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:15.760962', 'step': 7541, 'epoch': 2} {'type': 'loss', 'content': 0.15631231665611267, 'timestamp': '2025-09-30 22:23:15.763882', 'step': 7542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:15.844849', 'step': 7542, 'epoch': 2} {'type': 'loss', 'content': 0.12363207340240479, 'timestamp': '2025-09-30 22:23:15.847749', 'step': 7543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:15.907600', 'step': 7543, 'epoch': 2} {'type': 'loss', 'content': 0.1299505978822708, 'timestamp': '2025-09-30 22:23:15.914043', 'step': 7544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:15.972135', 'step': 7544, 'epoch': 2} {'type': 'loss', 'content': 0.10793521255254745, 'timestamp': '2025-09-30 22:23:15.974759', 'step': 7545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:16.043972', 'step': 7545, 'epoch': 2} {'type': 'loss', 'content': 0.11702824383974075, 'timestamp': '2025-09-30 22:23:16.047903', 'step': 7546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:16.107063', 'step': 7546, 'epoch': 2} {'type': 'loss', 'content': 0.1698985993862152, 'timestamp': '2025-09-30 22:23:16.111686', 'step': 7547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:16.176402', 'step': 7547, 'epoch': 2} {'type': 'loss', 'content': 0.20734372735023499, 'timestamp': '2025-09-30 22:23:16.184655', 'step': 7548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:16.251999', 'step': 7548, 'epoch': 2} {'type': 'loss', 'content': 0.08371295034885406, 'timestamp': '2025-09-30 22:23:16.254920', 'step': 7549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:16.326979', 'step': 7549, 'epoch': 2} {'type': 'loss', 'content': 0.26549267768859863, 'timestamp': '2025-09-30 22:23:16.330341', 'step': 7550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:16.390832', 'step': 7550, 'epoch': 2} {'type': 'loss', 'content': 0.09781087934970856, 'timestamp': '2025-09-30 22:23:16.394540', 'step': 7551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:16.468415', 'step': 7551, 'epoch': 2} {'type': 'loss', 'content': 0.11656130850315094, 'timestamp': '2025-09-30 22:23:16.479561', 'step': 7552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:16.553118', 'step': 7552, 'epoch': 2} {'type': 'loss', 'content': 0.1441856324672699, 'timestamp': '2025-09-30 22:23:16.556605', 'step': 7553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:16.621863', 'step': 7553, 'epoch': 2} {'type': 'loss', 'content': 0.1080910786986351, 'timestamp': '2025-09-30 22:23:16.625453', 'step': 7554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:16.695619', 'step': 7554, 'epoch': 2} {'type': 'loss', 'content': 0.11454904824495316, 'timestamp': '2025-09-30 22:23:16.701009', 'step': 7555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:16.760675', 'step': 7555, 'epoch': 2} {'type': 'loss', 'content': 0.17678327858448029, 'timestamp': '2025-09-30 22:23:16.770242', 'step': 7556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:16.831626', 'step': 7556, 'epoch': 2} {'type': 'loss', 'content': 0.14294716715812683, 'timestamp': '2025-09-30 22:23:16.837926', 'step': 7557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:16.915124', 'step': 7557, 'epoch': 2} {'type': 'loss', 'content': 0.1603359431028366, 'timestamp': '2025-09-30 22:23:16.918467', 'step': 7558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:17.007280', 'step': 7558, 'epoch': 2} {'type': 'loss', 'content': 0.12612661719322205, 'timestamp': '2025-09-30 22:23:17.010742', 'step': 7559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:17.071814', 'step': 7559, 'epoch': 2} {'type': 'loss', 'content': 0.16545698046684265, 'timestamp': '2025-09-30 22:23:17.084878', 'step': 7560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:17.161538', 'step': 7560, 'epoch': 2} {'type': 'loss', 'content': 0.17140530049800873, 'timestamp': '2025-09-30 22:23:17.166069', 'step': 7561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:17.226214', 'step': 7561, 'epoch': 2} {'type': 'loss', 'content': 0.12744057178497314, 'timestamp': '2025-09-30 22:23:17.238108', 'step': 7562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:17.310901', 'step': 7562, 'epoch': 2} {'type': 'loss', 'content': 0.2010435312986374, 'timestamp': '2025-09-30 22:23:17.317907', 'step': 7563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:17.416846', 'step': 7563, 'epoch': 2} {'type': 'loss', 'content': 0.18005715310573578, 'timestamp': '2025-09-30 22:23:17.425640', 'step': 7564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:17.487123', 'step': 7564, 'epoch': 2} {'type': 'loss', 'content': 0.1551010012626648, 'timestamp': '2025-09-30 22:23:17.490512', 'step': 7565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:17.567257', 'step': 7565, 'epoch': 2} {'type': 'loss', 'content': 0.1450870931148529, 'timestamp': '2025-09-30 22:23:17.571030', 'step': 7566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:17.635180', 'step': 7566, 'epoch': 2} {'type': 'loss', 'content': 0.16755163669586182, 'timestamp': '2025-09-30 22:23:17.650228', 'step': 7567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:17.727646', 'step': 7567, 'epoch': 2} {'type': 'loss', 'content': 0.13885599374771118, 'timestamp': '2025-09-30 22:23:17.734724', 'step': 7568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:17.793978', 'step': 7568, 'epoch': 2} {'type': 'loss', 'content': 0.11573798209428787, 'timestamp': '2025-09-30 22:23:17.797593', 'step': 7569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:17.855968', 'step': 7569, 'epoch': 2} {'type': 'loss', 'content': 0.22132782638072968, 'timestamp': '2025-09-30 22:23:17.858747', 'step': 7570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:17.915997', 'step': 7570, 'epoch': 2} {'type': 'loss', 'content': 0.10031898319721222, 'timestamp': '2025-09-30 22:23:17.918863', 'step': 7571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:17.977365', 'step': 7571, 'epoch': 2} {'type': 'loss', 'content': 0.09846001118421555, 'timestamp': '2025-09-30 22:23:17.984060', 'step': 7572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:18.041385', 'step': 7572, 'epoch': 2} {'type': 'loss', 'content': 0.1541808545589447, 'timestamp': '2025-09-30 22:23:18.043850', 'step': 7573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:18.108726', 'step': 7573, 'epoch': 2} {'type': 'loss', 'content': 0.2530232071876526, 'timestamp': '2025-09-30 22:23:18.110677', 'step': 7574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:18.170643', 'step': 7574, 'epoch': 2} {'type': 'loss', 'content': 0.09100572764873505, 'timestamp': '2025-09-30 22:23:18.173890', 'step': 7575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:18.230820', 'step': 7575, 'epoch': 2} {'type': 'loss', 'content': 0.2195747047662735, 'timestamp': '2025-09-30 22:23:18.237075', 'step': 7576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:18.328946', 'step': 7576, 'epoch': 2} {'type': 'loss', 'content': 0.17202860116958618, 'timestamp': '2025-09-30 22:23:18.332831', 'step': 7577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:18.405439', 'step': 7577, 'epoch': 2} {'type': 'loss', 'content': 0.18877258896827698, 'timestamp': '2025-09-30 22:23:18.413107', 'step': 7578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:18.490863', 'step': 7578, 'epoch': 2} {'type': 'loss', 'content': 0.102268747985363, 'timestamp': '2025-09-30 22:23:18.496548', 'step': 7579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:18.562720', 'step': 7579, 'epoch': 2} {'type': 'loss', 'content': 0.2403026521205902, 'timestamp': '2025-09-30 22:23:18.569512', 'step': 7580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:18.628466', 'step': 7580, 'epoch': 2} {'type': 'loss', 'content': 0.1733880341053009, 'timestamp': '2025-09-30 22:23:18.635080', 'step': 7581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:18.724130', 'step': 7581, 'epoch': 2} {'type': 'loss', 'content': 0.13973020017147064, 'timestamp': '2025-09-30 22:23:18.731164', 'step': 7582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:18.790759', 'step': 7582, 'epoch': 2} {'type': 'loss', 'content': 0.24024200439453125, 'timestamp': '2025-09-30 22:23:18.794513', 'step': 7583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:18.855598', 'step': 7583, 'epoch': 2} {'type': 'loss', 'content': 0.13272328674793243, 'timestamp': '2025-09-30 22:23:18.862643', 'step': 7584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:18.929067', 'step': 7584, 'epoch': 2} {'type': 'loss', 'content': 0.1281239092350006, 'timestamp': '2025-09-30 22:23:18.932486', 'step': 7585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:18.990271', 'step': 7585, 'epoch': 2} {'type': 'loss', 'content': 0.10120086371898651, 'timestamp': '2025-09-30 22:23:18.993632', 'step': 7586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:19.057867', 'step': 7586, 'epoch': 2} {'type': 'loss', 'content': 0.146034374833107, 'timestamp': '2025-09-30 22:23:19.060751', 'step': 7587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:19.122410', 'step': 7587, 'epoch': 2} {'type': 'loss', 'content': 0.29063230752944946, 'timestamp': '2025-09-30 22:23:19.129049', 'step': 7588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:19.200374', 'step': 7588, 'epoch': 2} {'type': 'loss', 'content': 0.1846098005771637, 'timestamp': '2025-09-30 22:23:19.203423', 'step': 7589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:19.278221', 'step': 7589, 'epoch': 2} {'type': 'loss', 'content': 0.1548607349395752, 'timestamp': '2025-09-30 22:23:19.282061', 'step': 7590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:19.344691', 'step': 7590, 'epoch': 2} {'type': 'loss', 'content': 0.10902541130781174, 'timestamp': '2025-09-30 22:23:19.347774', 'step': 7591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:19.404465', 'step': 7591, 'epoch': 2} {'type': 'loss', 'content': 0.15755595266819, 'timestamp': '2025-09-30 22:23:19.410762', 'step': 7592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:19.480079', 'step': 7592, 'epoch': 2} {'type': 'loss', 'content': 0.18868322670459747, 'timestamp': '2025-09-30 22:23:19.482854', 'step': 7593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:19.541967', 'step': 7593, 'epoch': 2} {'type': 'loss', 'content': 0.24006757140159607, 'timestamp': '2025-09-30 22:23:19.548759', 'step': 7594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:19.617102', 'step': 7594, 'epoch': 2} {'type': 'loss', 'content': 0.134843111038208, 'timestamp': '2025-09-30 22:23:19.621906', 'step': 7595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:19.684109', 'step': 7595, 'epoch': 2} {'type': 'loss', 'content': 0.14689387381076813, 'timestamp': '2025-09-30 22:23:19.692377', 'step': 7596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:19.757156', 'step': 7596, 'epoch': 2} {'type': 'loss', 'content': 0.16510289907455444, 'timestamp': '2025-09-30 22:23:19.760560', 'step': 7597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:19.832250', 'step': 7597, 'epoch': 2} {'type': 'loss', 'content': 0.1489700824022293, 'timestamp': '2025-09-30 22:23:19.835751', 'step': 7598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:19.899352', 'step': 7598, 'epoch': 2} {'type': 'loss', 'content': 0.1631377637386322, 'timestamp': '2025-09-30 22:23:19.902312', 'step': 7599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:19.963229', 'step': 7599, 'epoch': 2} {'type': 'loss', 'content': 0.21649682521820068, 'timestamp': '2025-09-30 22:23:19.969918', 'step': 7600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:20.039133', 'step': 7600, 'epoch': 2} {'type': 'loss', 'content': 0.16481079161167145, 'timestamp': '2025-09-30 22:23:20.041720', 'step': 7601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:20.116424', 'step': 7601, 'epoch': 2} {'type': 'loss', 'content': 0.1661583036184311, 'timestamp': '2025-09-30 22:23:20.127204', 'step': 7602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:20.187333', 'step': 7602, 'epoch': 2} {'type': 'loss', 'content': 0.19493620097637177, 'timestamp': '2025-09-30 22:23:20.190660', 'step': 7603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:20.255141', 'step': 7603, 'epoch': 2} {'type': 'loss', 'content': 0.14979781210422516, 'timestamp': '2025-09-30 22:23:20.262126', 'step': 7604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:20.324458', 'step': 7604, 'epoch': 2} {'type': 'loss', 'content': 0.14055849611759186, 'timestamp': '2025-09-30 22:23:20.327195', 'step': 7605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:20.384753', 'step': 7605, 'epoch': 2} {'type': 'loss', 'content': 0.20274892449378967, 'timestamp': '2025-09-30 22:23:20.388180', 'step': 7606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:20.447150', 'step': 7606, 'epoch': 2} {'type': 'loss', 'content': 0.14454391598701477, 'timestamp': '2025-09-30 22:23:20.450384', 'step': 7607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:20.510775', 'step': 7607, 'epoch': 2} {'type': 'loss', 'content': 0.14486198127269745, 'timestamp': '2025-09-30 22:23:20.532322', 'step': 7608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:20.604867', 'step': 7608, 'epoch': 2} {'type': 'loss', 'content': 0.0852208137512207, 'timestamp': '2025-09-30 22:23:20.609245', 'step': 7609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:20.673140', 'step': 7609, 'epoch': 2} {'type': 'loss', 'content': 0.1447477638721466, 'timestamp': '2025-09-30 22:23:20.681932', 'step': 7610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:20.760505', 'step': 7610, 'epoch': 2} {'type': 'loss', 'content': 0.10129764676094055, 'timestamp': '2025-09-30 22:23:20.772745', 'step': 7611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:20.832077', 'step': 7611, 'epoch': 2} {'type': 'loss', 'content': 0.0878700464963913, 'timestamp': '2025-09-30 22:23:20.839683', 'step': 7612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:20.897796', 'step': 7612, 'epoch': 2} {'type': 'loss', 'content': 0.12827856838703156, 'timestamp': '2025-09-30 22:23:20.918808', 'step': 7613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:20.994078', 'step': 7613, 'epoch': 2} {'type': 'loss', 'content': 0.18040746450424194, 'timestamp': '2025-09-30 22:23:20.997656', 'step': 7614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:23:21.058333', 'step': 7614, 'epoch': 2} {'type': 'loss', 'content': 0.29099199175834656, 'timestamp': '2025-09-30 22:23:21.061188', 'step': 7615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:21.122128', 'step': 7615, 'epoch': 2} {'type': 'loss', 'content': 0.17227883636951447, 'timestamp': '2025-09-30 22:23:21.129486', 'step': 7616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:21.188936', 'step': 7616, 'epoch': 2} {'type': 'loss', 'content': 0.09926923364400864, 'timestamp': '2025-09-30 22:23:21.191727', 'step': 7617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:21.261256', 'step': 7617, 'epoch': 2} {'type': 'loss', 'content': 0.1757131814956665, 'timestamp': '2025-09-30 22:23:21.264098', 'step': 7618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:21.329315', 'step': 7618, 'epoch': 2} {'type': 'loss', 'content': 0.12030678242444992, 'timestamp': '2025-09-30 22:23:21.336038', 'step': 7619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:21.412882', 'step': 7619, 'epoch': 2} {'type': 'loss', 'content': 0.1414748877286911, 'timestamp': '2025-09-30 22:23:21.419321', 'step': 7620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:21.478269', 'step': 7620, 'epoch': 2} {'type': 'loss', 'content': 0.18542970716953278, 'timestamp': '2025-09-30 22:23:21.489269', 'step': 7621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:21.554551', 'step': 7621, 'epoch': 2} {'type': 'loss', 'content': 0.22181154787540436, 'timestamp': '2025-09-30 22:23:21.557277', 'step': 7622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:21.618919', 'step': 7622, 'epoch': 2} {'type': 'loss', 'content': 0.12746906280517578, 'timestamp': '2025-09-30 22:23:21.632070', 'step': 7623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:21.691130', 'step': 7623, 'epoch': 2} {'type': 'loss', 'content': 0.12315840274095535, 'timestamp': '2025-09-30 22:23:21.707149', 'step': 7624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:21.774795', 'step': 7624, 'epoch': 2} {'type': 'loss', 'content': 0.20430831611156464, 'timestamp': '2025-09-30 22:23:21.778192', 'step': 7625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:21.842466', 'step': 7625, 'epoch': 2} {'type': 'loss', 'content': 0.11840972304344177, 'timestamp': '2025-09-30 22:23:21.845898', 'step': 7626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:21.906695', 'step': 7626, 'epoch': 2} {'type': 'loss', 'content': 0.0807230994105339, 'timestamp': '2025-09-30 22:23:21.910320', 'step': 7627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:21.969816', 'step': 7627, 'epoch': 2} {'type': 'loss', 'content': 0.12774015963077545, 'timestamp': '2025-09-30 22:23:21.976315', 'step': 7628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:22.033225', 'step': 7628, 'epoch': 2} {'type': 'loss', 'content': 0.08650962263345718, 'timestamp': '2025-09-30 22:23:22.037091', 'step': 7629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:22.097244', 'step': 7629, 'epoch': 2} {'type': 'loss', 'content': 0.12512323260307312, 'timestamp': '2025-09-30 22:23:22.101191', 'step': 7630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:22.161073', 'step': 7630, 'epoch': 2} {'type': 'loss', 'content': 0.1024608463048935, 'timestamp': '2025-09-30 22:23:22.164324', 'step': 7631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:22.223152', 'step': 7631, 'epoch': 2} {'type': 'loss', 'content': 0.19016091525554657, 'timestamp': '2025-09-30 22:23:22.230982', 'step': 7632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:22.294132', 'step': 7632, 'epoch': 2} {'type': 'loss', 'content': 0.23657077550888062, 'timestamp': '2025-09-30 22:23:22.298117', 'step': 7633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:22.358632', 'step': 7633, 'epoch': 2} {'type': 'loss', 'content': 0.17426693439483643, 'timestamp': '2025-09-30 22:23:22.361877', 'step': 7634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:22.438748', 'step': 7634, 'epoch': 2} {'type': 'loss', 'content': 0.17773693799972534, 'timestamp': '2025-09-30 22:23:22.443320', 'step': 7635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:22.535122', 'step': 7635, 'epoch': 2} {'type': 'loss', 'content': 0.2670433521270752, 'timestamp': '2025-09-30 22:23:22.546770', 'step': 7636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:22.605261', 'step': 7636, 'epoch': 2} {'type': 'loss', 'content': 0.19906985759735107, 'timestamp': '2025-09-30 22:23:22.618368', 'step': 7637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:22.678366', 'step': 7637, 'epoch': 2} {'type': 'loss', 'content': 0.07852360606193542, 'timestamp': '2025-09-30 22:23:22.681987', 'step': 7638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:22.743819', 'step': 7638, 'epoch': 2} {'type': 'loss', 'content': 0.22269077599048615, 'timestamp': '2025-09-30 22:23:22.748638', 'step': 7639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:22.808345', 'step': 7639, 'epoch': 2} {'type': 'loss', 'content': 0.12145579606294632, 'timestamp': '2025-09-30 22:23:22.816326', 'step': 7640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:22.873755', 'step': 7640, 'epoch': 2} {'type': 'loss', 'content': 0.1528620719909668, 'timestamp': '2025-09-30 22:23:22.886324', 'step': 7641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:22.943176', 'step': 7641, 'epoch': 2} {'type': 'loss', 'content': 0.11621782183647156, 'timestamp': '2025-09-30 22:23:22.948425', 'step': 7642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:23.007359', 'step': 7642, 'epoch': 2} {'type': 'loss', 'content': 0.1819687783718109, 'timestamp': '2025-09-30 22:23:23.011121', 'step': 7643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:23.078699', 'step': 7643, 'epoch': 2} {'type': 'loss', 'content': 0.11128775030374527, 'timestamp': '2025-09-30 22:23:23.087244', 'step': 7644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:23.145211', 'step': 7644, 'epoch': 2} {'type': 'loss', 'content': 0.1543533205986023, 'timestamp': '2025-09-30 22:23:23.148632', 'step': 7645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:23.210266', 'step': 7645, 'epoch': 2} {'type': 'loss', 'content': 0.10624351352453232, 'timestamp': '2025-09-30 22:23:23.214941', 'step': 7646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:23.294661', 'step': 7646, 'epoch': 2} {'type': 'loss', 'content': 0.1232193186879158, 'timestamp': '2025-09-30 22:23:23.298862', 'step': 7647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:23.364232', 'step': 7647, 'epoch': 2} {'type': 'loss', 'content': 0.14605046808719635, 'timestamp': '2025-09-30 22:23:23.374852', 'step': 7648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:23:23.449356', 'step': 7648, 'epoch': 2} {'type': 'loss', 'content': 0.1876743584871292, 'timestamp': '2025-09-30 22:23:23.453942', 'step': 7649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:23.519829', 'step': 7649, 'epoch': 2} {'type': 'loss', 'content': 0.22652731835842133, 'timestamp': '2025-09-30 22:23:23.529458', 'step': 7650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:23.592498', 'step': 7650, 'epoch': 2} {'type': 'loss', 'content': 0.08359241485595703, 'timestamp': '2025-09-30 22:23:23.596546', 'step': 7651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:23.656518', 'step': 7651, 'epoch': 2} {'type': 'loss', 'content': 0.2628289461135864, 'timestamp': '2025-09-30 22:23:23.663867', 'step': 7652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:23.723147', 'step': 7652, 'epoch': 2} {'type': 'loss', 'content': 0.17185506224632263, 'timestamp': '2025-09-30 22:23:23.733563', 'step': 7653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:23.795037', 'step': 7653, 'epoch': 2} {'type': 'loss', 'content': 0.18751133978366852, 'timestamp': '2025-09-30 22:23:23.798235', 'step': 7654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:23.870905', 'step': 7654, 'epoch': 2} {'type': 'loss', 'content': 0.157261922955513, 'timestamp': '2025-09-30 22:23:23.874292', 'step': 7655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:23.932705', 'step': 7655, 'epoch': 2} {'type': 'loss', 'content': 0.13371394574642181, 'timestamp': '2025-09-30 22:23:23.939152', 'step': 7656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:24.009156', 'step': 7656, 'epoch': 2} {'type': 'loss', 'content': 0.15947377681732178, 'timestamp': '2025-09-30 22:23:24.013083', 'step': 7657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:24.073195', 'step': 7657, 'epoch': 2} {'type': 'loss', 'content': 0.19716505706310272, 'timestamp': '2025-09-30 22:23:24.082473', 'step': 7658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:24.145773', 'step': 7658, 'epoch': 2} {'type': 'loss', 'content': 0.0806773453950882, 'timestamp': '2025-09-30 22:23:24.148855', 'step': 7659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:24.207968', 'step': 7659, 'epoch': 2} {'type': 'loss', 'content': 0.1315312683582306, 'timestamp': '2025-09-30 22:23:24.223873', 'step': 7660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:24.285083', 'step': 7660, 'epoch': 2} {'type': 'loss', 'content': 0.23278945684432983, 'timestamp': '2025-09-30 22:23:24.288224', 'step': 7661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:24.349181', 'step': 7661, 'epoch': 2} {'type': 'loss', 'content': 0.09493556618690491, 'timestamp': '2025-09-30 22:23:24.353677', 'step': 7662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:24.422423', 'step': 7662, 'epoch': 2} {'type': 'loss', 'content': 0.11674701422452927, 'timestamp': '2025-09-30 22:23:24.428550', 'step': 7663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:24.490152', 'step': 7663, 'epoch': 2} {'type': 'loss', 'content': 0.17377020418643951, 'timestamp': '2025-09-30 22:23:24.496973', 'step': 7664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:24.557017', 'step': 7664, 'epoch': 2} {'type': 'loss', 'content': 0.22130176424980164, 'timestamp': '2025-09-30 22:23:24.559985', 'step': 7665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:24.617989', 'step': 7665, 'epoch': 2} {'type': 'loss', 'content': 0.11504283547401428, 'timestamp': '2025-09-30 22:23:24.620929', 'step': 7666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:24.694697', 'step': 7666, 'epoch': 2} {'type': 'loss', 'content': 0.18962909281253815, 'timestamp': '2025-09-30 22:23:24.698624', 'step': 7667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:24.771624', 'step': 7667, 'epoch': 2} {'type': 'loss', 'content': 0.14899688959121704, 'timestamp': '2025-09-30 22:23:24.778510', 'step': 7668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:24.835776', 'step': 7668, 'epoch': 2} {'type': 'loss', 'content': 0.17179258167743683, 'timestamp': '2025-09-30 22:23:24.839365', 'step': 7669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:24.897113', 'step': 7669, 'epoch': 2} {'type': 'loss', 'content': 0.3114051818847656, 'timestamp': '2025-09-30 22:23:24.900148', 'step': 7670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:24.956224', 'step': 7670, 'epoch': 2} {'type': 'loss', 'content': 0.13206015527248383, 'timestamp': '2025-09-30 22:23:24.964316', 'step': 7671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.021305', 'step': 7671, 'epoch': 2} {'type': 'loss', 'content': 0.16207024455070496, 'timestamp': '2025-09-30 22:23:25.027570', 'step': 7672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.084684', 'step': 7672, 'epoch': 2} {'type': 'loss', 'content': 0.137207493185997, 'timestamp': '2025-09-30 22:23:25.088447', 'step': 7673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.145666', 'step': 7673, 'epoch': 2} {'type': 'loss', 'content': 0.24182510375976562, 'timestamp': '2025-09-30 22:23:25.148950', 'step': 7674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:25.222553', 'step': 7674, 'epoch': 2} {'type': 'loss', 'content': 0.13149107992649078, 'timestamp': '2025-09-30 22:23:25.225141', 'step': 7675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.292369', 'step': 7675, 'epoch': 2} {'type': 'loss', 'content': 0.23563353717327118, 'timestamp': '2025-09-30 22:23:25.298631', 'step': 7676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.356862', 'step': 7676, 'epoch': 2} {'type': 'loss', 'content': 0.16824081540107727, 'timestamp': '2025-09-30 22:23:25.371056', 'step': 7677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.428717', 'step': 7677, 'epoch': 2} {'type': 'loss', 'content': 0.1628042459487915, 'timestamp': '2025-09-30 22:23:25.431120', 'step': 7678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:25.494253', 'step': 7678, 'epoch': 2} {'type': 'loss', 'content': 0.1829639971256256, 'timestamp': '2025-09-30 22:23:25.497092', 'step': 7679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:25.559353', 'step': 7679, 'epoch': 2} {'type': 'loss', 'content': 0.1380986124277115, 'timestamp': '2025-09-30 22:23:25.566577', 'step': 7680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:25.636401', 'step': 7680, 'epoch': 2} {'type': 'loss', 'content': 0.16985879838466644, 'timestamp': '2025-09-30 22:23:25.644012', 'step': 7681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.702379', 'step': 7681, 'epoch': 2} {'type': 'loss', 'content': 0.08279677480459213, 'timestamp': '2025-09-30 22:23:25.711791', 'step': 7682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:25.772128', 'step': 7682, 'epoch': 2} {'type': 'loss', 'content': 0.2061631977558136, 'timestamp': '2025-09-30 22:23:25.775779', 'step': 7683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:25.834303', 'step': 7683, 'epoch': 2} {'type': 'loss', 'content': 0.0853748768568039, 'timestamp': '2025-09-30 22:23:25.841192', 'step': 7684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:25.910352', 'step': 7684, 'epoch': 2} {'type': 'loss', 'content': 0.11001540720462799, 'timestamp': '2025-09-30 22:23:25.914265', 'step': 7685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:25.973828', 'step': 7685, 'epoch': 2} {'type': 'loss', 'content': 0.15000846982002258, 'timestamp': '2025-09-30 22:23:25.977155', 'step': 7686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:26.059573', 'step': 7686, 'epoch': 2} {'type': 'loss', 'content': 0.17390769720077515, 'timestamp': '2025-09-30 22:23:26.063421', 'step': 7687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:26.124261', 'step': 7687, 'epoch': 2} {'type': 'loss', 'content': 0.13868646323680878, 'timestamp': '2025-09-30 22:23:26.131669', 'step': 7688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:26.200012', 'step': 7688, 'epoch': 2} {'type': 'loss', 'content': 0.17321772873401642, 'timestamp': '2025-09-30 22:23:26.211115', 'step': 7689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:26.293922', 'step': 7689, 'epoch': 2} {'type': 'loss', 'content': 0.10906685888767242, 'timestamp': '2025-09-30 22:23:26.297034', 'step': 7690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:26.354570', 'step': 7690, 'epoch': 2} {'type': 'loss', 'content': 0.11577421426773071, 'timestamp': '2025-09-30 22:23:26.358336', 'step': 7691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:26.417955', 'step': 7691, 'epoch': 2} {'type': 'loss', 'content': 0.22948648035526276, 'timestamp': '2025-09-30 22:23:26.424816', 'step': 7692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:26.482050', 'step': 7692, 'epoch': 2} {'type': 'loss', 'content': 0.16533757746219635, 'timestamp': '2025-09-30 22:23:26.485500', 'step': 7693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:26.546386', 'step': 7693, 'epoch': 2} {'type': 'loss', 'content': 0.13632932305335999, 'timestamp': '2025-09-30 22:23:26.555062', 'step': 7694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:26.622194', 'step': 7694, 'epoch': 2} {'type': 'loss', 'content': 0.22439584136009216, 'timestamp': '2025-09-30 22:23:26.632598', 'step': 7695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:26.689959', 'step': 7695, 'epoch': 2} {'type': 'loss', 'content': 0.08596044033765793, 'timestamp': '2025-09-30 22:23:26.696465', 'step': 7696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:26.755067', 'step': 7696, 'epoch': 2} {'type': 'loss', 'content': 0.18708918988704681, 'timestamp': '2025-09-30 22:23:26.759962', 'step': 7697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:26.827437', 'step': 7697, 'epoch': 2} {'type': 'loss', 'content': 0.2566685974597931, 'timestamp': '2025-09-30 22:23:26.831205', 'step': 7698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:26.889413', 'step': 7698, 'epoch': 2} {'type': 'loss', 'content': 0.1552678346633911, 'timestamp': '2025-09-30 22:23:26.892348', 'step': 7699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:26.960408', 'step': 7699, 'epoch': 2} {'type': 'loss', 'content': 0.0868259146809578, 'timestamp': '2025-09-30 22:23:26.968286', 'step': 7700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:27.031166', 'step': 7700, 'epoch': 2} {'type': 'loss', 'content': 0.20234562456607819, 'timestamp': '2025-09-30 22:23:27.033699', 'step': 7701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:27.102824', 'step': 7701, 'epoch': 2} {'type': 'loss', 'content': 0.12143391370773315, 'timestamp': '2025-09-30 22:23:27.105648', 'step': 7702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:27.168965', 'step': 7702, 'epoch': 2} {'type': 'loss', 'content': 0.2980448603630066, 'timestamp': '2025-09-30 22:23:27.179498', 'step': 7703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:27.239652', 'step': 7703, 'epoch': 2} {'type': 'loss', 'content': 0.17444632947444916, 'timestamp': '2025-09-30 22:23:27.246349', 'step': 7704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:27.308927', 'step': 7704, 'epoch': 2} {'type': 'loss', 'content': 0.20776140689849854, 'timestamp': '2025-09-30 22:23:27.312396', 'step': 7705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:27.373732', 'step': 7705, 'epoch': 2} {'type': 'loss', 'content': 0.21241495013237, 'timestamp': '2025-09-30 22:23:27.377081', 'step': 7706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:27.434988', 'step': 7706, 'epoch': 2} {'type': 'loss', 'content': 0.0838237851858139, 'timestamp': '2025-09-30 22:23:27.438727', 'step': 7707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:27.496362', 'step': 7707, 'epoch': 2} {'type': 'loss', 'content': 0.10171625018119812, 'timestamp': '2025-09-30 22:23:27.503391', 'step': 7708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:27.577432', 'step': 7708, 'epoch': 2} {'type': 'loss', 'content': 0.15352748334407806, 'timestamp': '2025-09-30 22:23:27.581218', 'step': 7709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:27.640533', 'step': 7709, 'epoch': 2} {'type': 'loss', 'content': 0.16110225021839142, 'timestamp': '2025-09-30 22:23:27.644011', 'step': 7710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:27.701521', 'step': 7710, 'epoch': 2} {'type': 'loss', 'content': 0.11587328463792801, 'timestamp': '2025-09-30 22:23:27.705586', 'step': 7711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:27.763296', 'step': 7711, 'epoch': 2} {'type': 'loss', 'content': 0.1823420524597168, 'timestamp': '2025-09-30 22:23:27.770218', 'step': 7712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:27.828124', 'step': 7712, 'epoch': 2} {'type': 'loss', 'content': 0.21035374701023102, 'timestamp': '2025-09-30 22:23:27.831799', 'step': 7713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:27.891430', 'step': 7713, 'epoch': 2} {'type': 'loss', 'content': 0.10449489206075668, 'timestamp': '2025-09-30 22:23:27.895189', 'step': 7714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:27.953205', 'step': 7714, 'epoch': 2} {'type': 'loss', 'content': 0.10945877432823181, 'timestamp': '2025-09-30 22:23:27.960163', 'step': 7715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.026399', 'step': 7715, 'epoch': 2} {'type': 'loss', 'content': 0.08206431567668915, 'timestamp': '2025-09-30 22:23:28.033595', 'step': 7716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:28.090171', 'step': 7716, 'epoch': 2} {'type': 'loss', 'content': 0.18992532789707184, 'timestamp': '2025-09-30 22:23:28.093061', 'step': 7717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.158630', 'step': 7717, 'epoch': 2} {'type': 'loss', 'content': 0.12063933908939362, 'timestamp': '2025-09-30 22:23:28.162892', 'step': 7718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:28.228118', 'step': 7718, 'epoch': 2} {'type': 'loss', 'content': 0.10845436155796051, 'timestamp': '2025-09-30 22:23:28.230766', 'step': 7719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.290654', 'step': 7719, 'epoch': 2} {'type': 'loss', 'content': 0.21153920888900757, 'timestamp': '2025-09-30 22:23:28.298220', 'step': 7720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.361132', 'step': 7720, 'epoch': 2} {'type': 'loss', 'content': 0.12786079943180084, 'timestamp': '2025-09-30 22:23:28.363897', 'step': 7721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:28.463055', 'step': 7721, 'epoch': 2} {'type': 'loss', 'content': 0.09475097060203552, 'timestamp': '2025-09-30 22:23:28.465948', 'step': 7722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.536708', 'step': 7722, 'epoch': 2} {'type': 'loss', 'content': 0.14691878855228424, 'timestamp': '2025-09-30 22:23:28.539485', 'step': 7723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:23:28.598961', 'step': 7723, 'epoch': 2} {'type': 'loss', 'content': 0.18170645833015442, 'timestamp': '2025-09-30 22:23:28.605514', 'step': 7724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.670317', 'step': 7724, 'epoch': 2} {'type': 'loss', 'content': 0.16806283593177795, 'timestamp': '2025-09-30 22:23:28.673542', 'step': 7725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:28.733228', 'step': 7725, 'epoch': 2} {'type': 'loss', 'content': 0.09231483191251755, 'timestamp': '2025-09-30 22:23:28.737364', 'step': 7726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.798741', 'step': 7726, 'epoch': 2} {'type': 'loss', 'content': 0.09640492498874664, 'timestamp': '2025-09-30 22:23:28.811867', 'step': 7727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:28.897393', 'step': 7727, 'epoch': 2} {'type': 'loss', 'content': 0.19699285924434662, 'timestamp': '2025-09-30 22:23:28.904125', 'step': 7728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:28.962329', 'step': 7728, 'epoch': 2} {'type': 'loss', 'content': 0.12887363135814667, 'timestamp': '2025-09-30 22:23:28.967289', 'step': 7729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:29.026389', 'step': 7729, 'epoch': 2} {'type': 'loss', 'content': 0.08641684800386429, 'timestamp': '2025-09-30 22:23:29.029163', 'step': 7730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:29.089449', 'step': 7730, 'epoch': 2} {'type': 'loss', 'content': 0.22644923627376556, 'timestamp': '2025-09-30 22:23:29.092402', 'step': 7731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:29.164173', 'step': 7731, 'epoch': 2} {'type': 'loss', 'content': 0.20304466784000397, 'timestamp': '2025-09-30 22:23:29.170402', 'step': 7732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:29.227769', 'step': 7732, 'epoch': 2} {'type': 'loss', 'content': 0.09533578902482986, 'timestamp': '2025-09-30 22:23:29.230907', 'step': 7733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:29.297763', 'step': 7733, 'epoch': 2} {'type': 'loss', 'content': 0.1277497261762619, 'timestamp': '2025-09-30 22:23:29.301024', 'step': 7734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:29.370255', 'step': 7734, 'epoch': 2} {'type': 'loss', 'content': 0.120958611369133, 'timestamp': '2025-09-30 22:23:29.382383', 'step': 7735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:29.443414', 'step': 7735, 'epoch': 2} {'type': 'loss', 'content': 0.07511913776397705, 'timestamp': '2025-09-30 22:23:29.450121', 'step': 7736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:29.510766', 'step': 7736, 'epoch': 2} {'type': 'loss', 'content': 0.1274671107530594, 'timestamp': '2025-09-30 22:23:29.513741', 'step': 7737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:29.573821', 'step': 7737, 'epoch': 2} {'type': 'loss', 'content': 0.16899888217449188, 'timestamp': '2025-09-30 22:23:29.576557', 'step': 7738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:29.647615', 'step': 7738, 'epoch': 2} {'type': 'loss', 'content': 0.1478462666273117, 'timestamp': '2025-09-30 22:23:29.650403', 'step': 7739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:29.709290', 'step': 7739, 'epoch': 2} {'type': 'loss', 'content': 0.18910670280456543, 'timestamp': '2025-09-30 22:23:29.716354', 'step': 7740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:29.774297', 'step': 7740, 'epoch': 2} {'type': 'loss', 'content': 0.06846418976783752, 'timestamp': '2025-09-30 22:23:29.777583', 'step': 7741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:29.848249', 'step': 7741, 'epoch': 2} {'type': 'loss', 'content': 0.1211840882897377, 'timestamp': '2025-09-30 22:23:29.851010', 'step': 7742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:29.907434', 'step': 7742, 'epoch': 2} {'type': 'loss', 'content': 0.10013493150472641, 'timestamp': '2025-09-30 22:23:29.910319', 'step': 7743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:29.982238', 'step': 7743, 'epoch': 2} {'type': 'loss', 'content': 0.08064582943916321, 'timestamp': '2025-09-30 22:23:29.994637', 'step': 7744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:30.052229', 'step': 7744, 'epoch': 2} {'type': 'loss', 'content': 0.15098552405834198, 'timestamp': '2025-09-30 22:23:30.057517', 'step': 7745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:30.116348', 'step': 7745, 'epoch': 2} {'type': 'loss', 'content': 0.21524189412593842, 'timestamp': '2025-09-30 22:23:30.119395', 'step': 7746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:30.177355', 'step': 7746, 'epoch': 2} {'type': 'loss', 'content': 0.22337281703948975, 'timestamp': '2025-09-30 22:23:30.179910', 'step': 7747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:30.254814', 'step': 7747, 'epoch': 2} {'type': 'loss', 'content': 0.27635082602500916, 'timestamp': '2025-09-30 22:23:30.260984', 'step': 7748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:30.318950', 'step': 7748, 'epoch': 2} {'type': 'loss', 'content': 0.19540676474571228, 'timestamp': '2025-09-30 22:23:30.322051', 'step': 7749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:30.381512', 'step': 7749, 'epoch': 2} {'type': 'loss', 'content': 0.1434040665626526, 'timestamp': '2025-09-30 22:23:30.384501', 'step': 7750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:30.449243', 'step': 7750, 'epoch': 2} {'type': 'loss', 'content': 0.09537605196237564, 'timestamp': '2025-09-30 22:23:30.452760', 'step': 7751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:30.510582', 'step': 7751, 'epoch': 2} {'type': 'loss', 'content': 0.2213519960641861, 'timestamp': '2025-09-30 22:23:30.517271', 'step': 7752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:30.575409', 'step': 7752, 'epoch': 2} {'type': 'loss', 'content': 0.2475227564573288, 'timestamp': '2025-09-30 22:23:30.579362', 'step': 7753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:30.654623', 'step': 7753, 'epoch': 2} {'type': 'loss', 'content': 0.1980924755334854, 'timestamp': '2025-09-30 22:23:30.657910', 'step': 7754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:30.716732', 'step': 7754, 'epoch': 2} {'type': 'loss', 'content': 0.13916440308094025, 'timestamp': '2025-09-30 22:23:30.720789', 'step': 7755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:30.783571', 'step': 7755, 'epoch': 2} {'type': 'loss', 'content': 0.10427035391330719, 'timestamp': '2025-09-30 22:23:30.793595', 'step': 7756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:30.858424', 'step': 7756, 'epoch': 2} {'type': 'loss', 'content': 0.1421305388212204, 'timestamp': '2025-09-30 22:23:30.864898', 'step': 7757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:30.923378', 'step': 7757, 'epoch': 2} {'type': 'loss', 'content': 0.12651458382606506, 'timestamp': '2025-09-30 22:23:30.930634', 'step': 7758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:30.988390', 'step': 7758, 'epoch': 2} {'type': 'loss', 'content': 0.1617441624403, 'timestamp': '2025-09-30 22:23:30.996254', 'step': 7759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:31.056639', 'step': 7759, 'epoch': 2} {'type': 'loss', 'content': 0.15529850125312805, 'timestamp': '2025-09-30 22:23:31.067213', 'step': 7760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:31.139656', 'step': 7760, 'epoch': 2} {'type': 'loss', 'content': 0.2079007774591446, 'timestamp': '2025-09-30 22:23:31.143210', 'step': 7761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:31.203128', 'step': 7761, 'epoch': 2} {'type': 'loss', 'content': 0.17911818623542786, 'timestamp': '2025-09-30 22:23:31.205919', 'step': 7762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:31.269719', 'step': 7762, 'epoch': 2} {'type': 'loss', 'content': 0.13279804587364197, 'timestamp': '2025-09-30 22:23:31.272533', 'step': 7763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:31.337843', 'step': 7763, 'epoch': 2} {'type': 'loss', 'content': 0.1406935304403305, 'timestamp': '2025-09-30 22:23:31.352317', 'step': 7764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-30 22:23:31.423468', 'step': 7764, 'epoch': 2} {'type': 'loss', 'content': 0.14762820303440094, 'timestamp': '2025-09-30 22:23:31.436869', 'step': 7765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:31.494375', 'step': 7765, 'epoch': 2} {'type': 'loss', 'content': 0.21167610585689545, 'timestamp': '2025-09-30 22:23:31.497910', 'step': 7766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:31.558345', 'step': 7766, 'epoch': 2} {'type': 'loss', 'content': 0.1787322461605072, 'timestamp': '2025-09-30 22:23:31.561935', 'step': 7767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:31.637018', 'step': 7767, 'epoch': 2} {'type': 'loss', 'content': 0.10631535202264786, 'timestamp': '2025-09-30 22:23:31.643299', 'step': 7768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:31.710591', 'step': 7768, 'epoch': 2} {'type': 'loss', 'content': 0.13373693823814392, 'timestamp': '2025-09-30 22:23:31.714179', 'step': 7769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:31.778785', 'step': 7769, 'epoch': 2} {'type': 'loss', 'content': 0.1744837611913681, 'timestamp': '2025-09-30 22:23:31.782594', 'step': 7770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:31.843421', 'step': 7770, 'epoch': 2} {'type': 'loss', 'content': 0.16778135299682617, 'timestamp': '2025-09-30 22:23:31.846474', 'step': 7771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:31.923841', 'step': 7771, 'epoch': 2} {'type': 'loss', 'content': 0.12359705567359924, 'timestamp': '2025-09-30 22:23:31.931076', 'step': 7772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:31.989938', 'step': 7772, 'epoch': 2} {'type': 'loss', 'content': 0.07029398530721664, 'timestamp': '2025-09-30 22:23:31.992383', 'step': 7773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:32.050413', 'step': 7773, 'epoch': 2} {'type': 'loss', 'content': 0.14007167518138885, 'timestamp': '2025-09-30 22:23:32.053629', 'step': 7774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:32.129625', 'step': 7774, 'epoch': 2} {'type': 'loss', 'content': 0.11589126288890839, 'timestamp': '2025-09-30 22:23:32.138874', 'step': 7775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:32.202945', 'step': 7775, 'epoch': 2} {'type': 'loss', 'content': 0.09568511694669724, 'timestamp': '2025-09-30 22:23:32.210025', 'step': 7776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:32.268543', 'step': 7776, 'epoch': 2} {'type': 'loss', 'content': 0.11303714662790298, 'timestamp': '2025-09-30 22:23:32.271180', 'step': 7777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:32.329804', 'step': 7777, 'epoch': 2} {'type': 'loss', 'content': 0.09741292893886566, 'timestamp': '2025-09-30 22:23:32.332496', 'step': 7778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:32.393722', 'step': 7778, 'epoch': 2} {'type': 'loss', 'content': 0.17986035346984863, 'timestamp': '2025-09-30 22:23:32.397860', 'step': 7779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:32.459539', 'step': 7779, 'epoch': 2} {'type': 'loss', 'content': 0.1811787486076355, 'timestamp': '2025-09-30 22:23:32.466061', 'step': 7780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:32.532082', 'step': 7780, 'epoch': 2} {'type': 'loss', 'content': 0.2191431075334549, 'timestamp': '2025-09-30 22:23:32.535586', 'step': 7781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:32.595505', 'step': 7781, 'epoch': 2} {'type': 'loss', 'content': 0.13743214309215546, 'timestamp': '2025-09-30 22:23:32.600736', 'step': 7782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:32.668354', 'step': 7782, 'epoch': 2} {'type': 'loss', 'content': 0.1067267432808876, 'timestamp': '2025-09-30 22:23:32.672346', 'step': 7783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:32.734482', 'step': 7783, 'epoch': 2} {'type': 'loss', 'content': 0.10510945320129395, 'timestamp': '2025-09-30 22:23:32.741701', 'step': 7784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:32.806390', 'step': 7784, 'epoch': 2} {'type': 'loss', 'content': 0.10994051396846771, 'timestamp': '2025-09-30 22:23:32.810237', 'step': 7785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:32.868232', 'step': 7785, 'epoch': 2} {'type': 'loss', 'content': 0.19774509966373444, 'timestamp': '2025-09-30 22:23:32.871626', 'step': 7786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:32.930635', 'step': 7786, 'epoch': 2} {'type': 'loss', 'content': 0.12243769317865372, 'timestamp': '2025-09-30 22:23:32.933396', 'step': 7787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:32.992947', 'step': 7787, 'epoch': 2} {'type': 'loss', 'content': 0.17192716896533966, 'timestamp': '2025-09-30 22:23:33.005352', 'step': 7788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:33.082568', 'step': 7788, 'epoch': 2} {'type': 'loss', 'content': 0.17537032067775726, 'timestamp': '2025-09-30 22:23:33.085747', 'step': 7789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:33.153241', 'step': 7789, 'epoch': 2} {'type': 'loss', 'content': 0.09728892147541046, 'timestamp': '2025-09-30 22:23:33.156600', 'step': 7790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:33.214539', 'step': 7790, 'epoch': 2} {'type': 'loss', 'content': 0.16864988207817078, 'timestamp': '2025-09-30 22:23:33.218569', 'step': 7791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:33.280232', 'step': 7791, 'epoch': 2} {'type': 'loss', 'content': 0.20019422471523285, 'timestamp': '2025-09-30 22:23:33.286889', 'step': 7792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:33.346143', 'step': 7792, 'epoch': 2} {'type': 'loss', 'content': 0.09990452229976654, 'timestamp': '2025-09-30 22:23:33.351482', 'step': 7793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:33.410438', 'step': 7793, 'epoch': 2} {'type': 'loss', 'content': 0.1654098778963089, 'timestamp': '2025-09-30 22:23:33.413921', 'step': 7794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:33.473721', 'step': 7794, 'epoch': 2} {'type': 'loss', 'content': 0.08679109811782837, 'timestamp': '2025-09-30 22:23:33.476819', 'step': 7795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:33.536012', 'step': 7795, 'epoch': 2} {'type': 'loss', 'content': 0.14941492676734924, 'timestamp': '2025-09-30 22:23:33.542901', 'step': 7796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:33.610361', 'step': 7796, 'epoch': 2} {'type': 'loss', 'content': 0.17129412293434143, 'timestamp': '2025-09-30 22:23:33.613328', 'step': 7797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:33.670901', 'step': 7797, 'epoch': 2} {'type': 'loss', 'content': 0.12231957912445068, 'timestamp': '2025-09-30 22:23:33.675055', 'step': 7798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:33.738786', 'step': 7798, 'epoch': 2} {'type': 'loss', 'content': 0.16486646234989166, 'timestamp': '2025-09-30 22:23:33.742225', 'step': 7799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:33.799908', 'step': 7799, 'epoch': 2} {'type': 'loss', 'content': 0.14847144484519958, 'timestamp': '2025-09-30 22:23:33.806183', 'step': 7800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:33.864181', 'step': 7800, 'epoch': 2} {'type': 'loss', 'content': 0.14237220585346222, 'timestamp': '2025-09-30 22:23:33.867553', 'step': 7801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:33.925079', 'step': 7801, 'epoch': 2} {'type': 'loss', 'content': 0.14224275946617126, 'timestamp': '2025-09-30 22:23:33.927804', 'step': 7802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:33.990957', 'step': 7802, 'epoch': 2} {'type': 'loss', 'content': 0.1379231959581375, 'timestamp': '2025-09-30 22:23:33.993651', 'step': 7803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:34.051753', 'step': 7803, 'epoch': 2} {'type': 'loss', 'content': 0.12363272905349731, 'timestamp': '2025-09-30 22:23:34.059310', 'step': 7804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:34.133692', 'step': 7804, 'epoch': 2} {'type': 'loss', 'content': 0.19483642280101776, 'timestamp': '2025-09-30 22:23:34.138073', 'step': 7805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:34.198509', 'step': 7805, 'epoch': 2} {'type': 'loss', 'content': 0.23621883988380432, 'timestamp': '2025-09-30 22:23:34.202867', 'step': 7806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:34.271570', 'step': 7806, 'epoch': 2} {'type': 'loss', 'content': 0.23407913744449615, 'timestamp': '2025-09-30 22:23:34.274502', 'step': 7807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:34.332696', 'step': 7807, 'epoch': 2} {'type': 'loss', 'content': 0.10658172518014908, 'timestamp': '2025-09-30 22:23:34.340604', 'step': 7808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:34.397224', 'step': 7808, 'epoch': 2} {'type': 'loss', 'content': 0.15862824022769928, 'timestamp': '2025-09-30 22:23:34.410498', 'step': 7809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:34.468219', 'step': 7809, 'epoch': 2} {'type': 'loss', 'content': 0.0931200236082077, 'timestamp': '2025-09-30 22:23:34.475538', 'step': 7810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:34.534408', 'step': 7810, 'epoch': 2} {'type': 'loss', 'content': 0.15179386734962463, 'timestamp': '2025-09-30 22:23:34.542643', 'step': 7811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:34.601442', 'step': 7811, 'epoch': 2} {'type': 'loss', 'content': 0.14683891832828522, 'timestamp': '2025-09-30 22:23:34.617465', 'step': 7812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:34.676688', 'step': 7812, 'epoch': 2} {'type': 'loss', 'content': 0.08466697484254837, 'timestamp': '2025-09-30 22:23:34.681808', 'step': 7813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:34.741889', 'step': 7813, 'epoch': 2} {'type': 'loss', 'content': 0.0861668735742569, 'timestamp': '2025-09-30 22:23:34.745602', 'step': 7814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:34.808924', 'step': 7814, 'epoch': 2} {'type': 'loss', 'content': 0.0682750791311264, 'timestamp': '2025-09-30 22:23:34.811893', 'step': 7815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:34.874755', 'step': 7815, 'epoch': 2} {'type': 'loss', 'content': 0.2864004075527191, 'timestamp': '2025-09-30 22:23:34.882739', 'step': 7816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:34.939730', 'step': 7816, 'epoch': 2} {'type': 'loss', 'content': 0.15245211124420166, 'timestamp': '2025-09-30 22:23:34.948450', 'step': 7817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:35.016541', 'step': 7817, 'epoch': 2} {'type': 'loss', 'content': 0.05984107777476311, 'timestamp': '2025-09-30 22:23:35.020033', 'step': 7818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:35.078430', 'step': 7818, 'epoch': 2} {'type': 'loss', 'content': 0.14999158680438995, 'timestamp': '2025-09-30 22:23:35.085505', 'step': 7819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:35.157123', 'step': 7819, 'epoch': 2} {'type': 'loss', 'content': 0.16566202044487, 'timestamp': '2025-09-30 22:23:35.162953', 'step': 7820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:35.220309', 'step': 7820, 'epoch': 2} {'type': 'loss', 'content': 0.10499756783246994, 'timestamp': '2025-09-30 22:23:35.223645', 'step': 7821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:35.280760', 'step': 7821, 'epoch': 2} {'type': 'loss', 'content': 0.13026964664459229, 'timestamp': '2025-09-30 22:23:35.285349', 'step': 7822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:35.343101', 'step': 7822, 'epoch': 2} {'type': 'loss', 'content': 0.06955286860466003, 'timestamp': '2025-09-30 22:23:35.346256', 'step': 7823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:35.405808', 'step': 7823, 'epoch': 2} {'type': 'loss', 'content': 0.11194976419210434, 'timestamp': '2025-09-30 22:23:35.413473', 'step': 7824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:35.471787', 'step': 7824, 'epoch': 2} {'type': 'loss', 'content': 0.21218988299369812, 'timestamp': '2025-09-30 22:23:35.478491', 'step': 7825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:35.538054', 'step': 7825, 'epoch': 2} {'type': 'loss', 'content': 0.1296873539686203, 'timestamp': '2025-09-30 22:23:35.543891', 'step': 7826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:35.605318', 'step': 7826, 'epoch': 2} {'type': 'loss', 'content': 0.28756117820739746, 'timestamp': '2025-09-30 22:23:35.608442', 'step': 7827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:35.671607', 'step': 7827, 'epoch': 2} {'type': 'loss', 'content': 0.10623134672641754, 'timestamp': '2025-09-30 22:23:35.678608', 'step': 7828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:35.740130', 'step': 7828, 'epoch': 2} {'type': 'loss', 'content': 0.1702122837305069, 'timestamp': '2025-09-30 22:23:35.746802', 'step': 7829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:35.815314', 'step': 7829, 'epoch': 2} {'type': 'loss', 'content': 0.07707162201404572, 'timestamp': '2025-09-30 22:23:35.818439', 'step': 7830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:35.883972', 'step': 7830, 'epoch': 2} {'type': 'loss', 'content': 0.1560833901166916, 'timestamp': '2025-09-30 22:23:35.886945', 'step': 7831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:35.945124', 'step': 7831, 'epoch': 2} {'type': 'loss', 'content': 0.2145458608865738, 'timestamp': '2025-09-30 22:23:35.951768', 'step': 7832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:23:36.011950', 'step': 7832, 'epoch': 2} {'type': 'loss', 'content': 0.10945769399404526, 'timestamp': '2025-09-30 22:23:36.018014', 'step': 7833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:36.081861', 'step': 7833, 'epoch': 2} {'type': 'loss', 'content': 0.14653408527374268, 'timestamp': '2025-09-30 22:23:36.084974', 'step': 7834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:36.144671', 'step': 7834, 'epoch': 2} {'type': 'loss', 'content': 0.1300702691078186, 'timestamp': '2025-09-30 22:23:36.147345', 'step': 7835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:36.206234', 'step': 7835, 'epoch': 2} {'type': 'loss', 'content': 0.1085548996925354, 'timestamp': '2025-09-30 22:23:36.213062', 'step': 7836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:36.279454', 'step': 7836, 'epoch': 2} {'type': 'loss', 'content': 0.1868685930967331, 'timestamp': '2025-09-30 22:23:36.283283', 'step': 7837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:36.345925', 'step': 7837, 'epoch': 2} {'type': 'loss', 'content': 0.1093355119228363, 'timestamp': '2025-09-30 22:23:36.350638', 'step': 7838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:36.416794', 'step': 7838, 'epoch': 2} {'type': 'loss', 'content': 0.14354394376277924, 'timestamp': '2025-09-30 22:23:36.421191', 'step': 7839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:36.483066', 'step': 7839, 'epoch': 2} {'type': 'loss', 'content': 0.24943692982196808, 'timestamp': '2025-09-30 22:23:36.493164', 'step': 7840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:36.549336', 'step': 7840, 'epoch': 2} {'type': 'loss', 'content': 0.16436903178691864, 'timestamp': '2025-09-30 22:23:36.552941', 'step': 7841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:36.617414', 'step': 7841, 'epoch': 2} {'type': 'loss', 'content': 0.2650718092918396, 'timestamp': '2025-09-30 22:23:36.624857', 'step': 7842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:36.703197', 'step': 7842, 'epoch': 2} {'type': 'loss', 'content': 0.0789032131433487, 'timestamp': '2025-09-30 22:23:36.712643', 'step': 7843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:36.778276', 'step': 7843, 'epoch': 2} {'type': 'loss', 'content': 0.1486840397119522, 'timestamp': '2025-09-30 22:23:36.785230', 'step': 7844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:36.863298', 'step': 7844, 'epoch': 2} {'type': 'loss', 'content': 0.15402346849441528, 'timestamp': '2025-09-30 22:23:36.866886', 'step': 7845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:36.928225', 'step': 7845, 'epoch': 2} {'type': 'loss', 'content': 0.1043984591960907, 'timestamp': '2025-09-30 22:23:36.930679', 'step': 7846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:36.995589', 'step': 7846, 'epoch': 2} {'type': 'loss', 'content': 0.06309335678815842, 'timestamp': '2025-09-30 22:23:37.002133', 'step': 7847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:37.075139', 'step': 7847, 'epoch': 2} {'type': 'loss', 'content': 0.20559895038604736, 'timestamp': '2025-09-30 22:23:37.093911', 'step': 7848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:37.152798', 'step': 7848, 'epoch': 2} {'type': 'loss', 'content': 0.17474006116390228, 'timestamp': '2025-09-30 22:23:37.155631', 'step': 7849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:37.232755', 'step': 7849, 'epoch': 2} {'type': 'loss', 'content': 0.1281183511018753, 'timestamp': '2025-09-30 22:23:37.239600', 'step': 7850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:37.299795', 'step': 7850, 'epoch': 2} {'type': 'loss', 'content': 0.16915962100028992, 'timestamp': '2025-09-30 22:23:37.303908', 'step': 7851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:37.364074', 'step': 7851, 'epoch': 2} {'type': 'loss', 'content': 0.1656850129365921, 'timestamp': '2025-09-30 22:23:37.370349', 'step': 7852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:37.430742', 'step': 7852, 'epoch': 2} {'type': 'loss', 'content': 0.13168865442276, 'timestamp': '2025-09-30 22:23:37.442753', 'step': 7853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:37.500164', 'step': 7853, 'epoch': 2} {'type': 'loss', 'content': 0.10394565016031265, 'timestamp': '2025-09-30 22:23:37.502763', 'step': 7854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:37.564460', 'step': 7854, 'epoch': 2} {'type': 'loss', 'content': 0.21406340599060059, 'timestamp': '2025-09-30 22:23:37.572288', 'step': 7855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:37.632855', 'step': 7855, 'epoch': 2} {'type': 'loss', 'content': 0.10837310552597046, 'timestamp': '2025-09-30 22:23:37.639497', 'step': 7856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:37.701936', 'step': 7856, 'epoch': 2} {'type': 'loss', 'content': 0.10044775158166885, 'timestamp': '2025-09-30 22:23:37.705059', 'step': 7857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:37.768885', 'step': 7857, 'epoch': 2} {'type': 'loss', 'content': 0.2670462429523468, 'timestamp': '2025-09-30 22:23:37.771564', 'step': 7858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:37.833699', 'step': 7858, 'epoch': 2} {'type': 'loss', 'content': 0.15422381460666656, 'timestamp': '2025-09-30 22:23:37.836414', 'step': 7859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:37.895067', 'step': 7859, 'epoch': 2} {'type': 'loss', 'content': 0.12652382254600525, 'timestamp': '2025-09-30 22:23:37.900847', 'step': 7860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:37.962400', 'step': 7860, 'epoch': 2} {'type': 'loss', 'content': 0.1868048459291458, 'timestamp': '2025-09-30 22:23:37.965258', 'step': 7861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:38.034626', 'step': 7861, 'epoch': 2} {'type': 'loss', 'content': 0.12765271961688995, 'timestamp': '2025-09-30 22:23:38.037976', 'step': 7862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:38.105091', 'step': 7862, 'epoch': 2} {'type': 'loss', 'content': 0.17048320174217224, 'timestamp': '2025-09-30 22:23:38.109921', 'step': 7863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:38.169396', 'step': 7863, 'epoch': 2} {'type': 'loss', 'content': 0.10083983093500137, 'timestamp': '2025-09-30 22:23:38.183143', 'step': 7864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:38.240386', 'step': 7864, 'epoch': 2} {'type': 'loss', 'content': 0.17325372993946075, 'timestamp': '2025-09-30 22:23:38.243256', 'step': 7865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:38.302259', 'step': 7865, 'epoch': 2} {'type': 'loss', 'content': 0.10735700279474258, 'timestamp': '2025-09-30 22:23:38.305044', 'step': 7866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:38.364211', 'step': 7866, 'epoch': 2} {'type': 'loss', 'content': 0.07861793786287308, 'timestamp': '2025-09-30 22:23:38.367853', 'step': 7867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:38.450582', 'step': 7867, 'epoch': 2} {'type': 'loss', 'content': 0.12169961631298065, 'timestamp': '2025-09-30 22:23:38.464354', 'step': 7868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:38.523181', 'step': 7868, 'epoch': 2} {'type': 'loss', 'content': 0.14472809433937073, 'timestamp': '2025-09-30 22:23:38.525646', 'step': 7869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:38.583647', 'step': 7869, 'epoch': 2} {'type': 'loss', 'content': 0.15844424068927765, 'timestamp': '2025-09-30 22:23:38.589842', 'step': 7870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:38.651960', 'step': 7870, 'epoch': 2} {'type': 'loss', 'content': 0.22781239449977875, 'timestamp': '2025-09-30 22:23:38.655542', 'step': 7871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:38.720292', 'step': 7871, 'epoch': 2} {'type': 'loss', 'content': 0.148966982960701, 'timestamp': '2025-09-30 22:23:38.727321', 'step': 7872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:38.789370', 'step': 7872, 'epoch': 2} {'type': 'loss', 'content': 0.12649773061275482, 'timestamp': '2025-09-30 22:23:38.793299', 'step': 7873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:38.857884', 'step': 7873, 'epoch': 2} {'type': 'loss', 'content': 0.10177230834960938, 'timestamp': '2025-09-30 22:23:38.861482', 'step': 7874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:38.921823', 'step': 7874, 'epoch': 2} {'type': 'loss', 'content': 0.10060276836156845, 'timestamp': '2025-09-30 22:23:38.924372', 'step': 7875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:38.982361', 'step': 7875, 'epoch': 2} {'type': 'loss', 'content': 0.09294788539409637, 'timestamp': '2025-09-30 22:23:38.989103', 'step': 7876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:39.059093', 'step': 7876, 'epoch': 2} {'type': 'loss', 'content': 0.08069058507680893, 'timestamp': '2025-09-30 22:23:39.061748', 'step': 7877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:39.118845', 'step': 7877, 'epoch': 2} {'type': 'loss', 'content': 0.1315004527568817, 'timestamp': '2025-09-30 22:23:39.125141', 'step': 7878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:39.187664', 'step': 7878, 'epoch': 2} {'type': 'loss', 'content': 0.09137814491987228, 'timestamp': '2025-09-30 22:23:39.190381', 'step': 7879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:39.257432', 'step': 7879, 'epoch': 2} {'type': 'loss', 'content': 0.1450047641992569, 'timestamp': '2025-09-30 22:23:39.269798', 'step': 7880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:39.327980', 'step': 7880, 'epoch': 2} {'type': 'loss', 'content': 0.10991812497377396, 'timestamp': '2025-09-30 22:23:39.335310', 'step': 7881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:39.400070', 'step': 7881, 'epoch': 2} {'type': 'loss', 'content': 0.1874198168516159, 'timestamp': '2025-09-30 22:23:39.404043', 'step': 7882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:39.473334', 'step': 7882, 'epoch': 2} {'type': 'loss', 'content': 0.13339217007160187, 'timestamp': '2025-09-30 22:23:39.477564', 'step': 7883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:39.537076', 'step': 7883, 'epoch': 2} {'type': 'loss', 'content': 0.12486951053142548, 'timestamp': '2025-09-30 22:23:39.543004', 'step': 7884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:39.601847', 'step': 7884, 'epoch': 2} {'type': 'loss', 'content': 0.10197976976633072, 'timestamp': '2025-09-30 22:23:39.607158', 'step': 7885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:39.670925', 'step': 7885, 'epoch': 2} {'type': 'loss', 'content': 0.15039195120334625, 'timestamp': '2025-09-30 22:23:39.678481', 'step': 7886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:39.749827', 'step': 7886, 'epoch': 2} {'type': 'loss', 'content': 0.2368413209915161, 'timestamp': '2025-09-30 22:23:39.752347', 'step': 7887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:39.812124', 'step': 7887, 'epoch': 2} {'type': 'loss', 'content': 0.21365030109882355, 'timestamp': '2025-09-30 22:23:39.824432', 'step': 7888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:39.891309', 'step': 7888, 'epoch': 2} {'type': 'loss', 'content': 0.12004641443490982, 'timestamp': '2025-09-30 22:23:39.895614', 'step': 7889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:39.956369', 'step': 7889, 'epoch': 2} {'type': 'loss', 'content': 0.047915127128362656, 'timestamp': '2025-09-30 22:23:39.961747', 'step': 7890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:40.026714', 'step': 7890, 'epoch': 2} {'type': 'loss', 'content': 0.14210562407970428, 'timestamp': '2025-09-30 22:23:40.038923', 'step': 7891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:40.099116', 'step': 7891, 'epoch': 2} {'type': 'loss', 'content': 0.12810547649860382, 'timestamp': '2025-09-30 22:23:40.105876', 'step': 7892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:40.172430', 'step': 7892, 'epoch': 2} {'type': 'loss', 'content': 0.10881435871124268, 'timestamp': '2025-09-30 22:23:40.181153', 'step': 7893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:40.262939', 'step': 7893, 'epoch': 2} {'type': 'loss', 'content': 0.07696235924959183, 'timestamp': '2025-09-30 22:23:40.265628', 'step': 7894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:40.327184', 'step': 7894, 'epoch': 2} {'type': 'loss', 'content': 0.24519871175289154, 'timestamp': '2025-09-30 22:23:40.330469', 'step': 7895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:40.396727', 'step': 7895, 'epoch': 2} {'type': 'loss', 'content': 0.13282528519630432, 'timestamp': '2025-09-30 22:23:40.402960', 'step': 7896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:40.465741', 'step': 7896, 'epoch': 2} {'type': 'loss', 'content': 0.20642589032649994, 'timestamp': '2025-09-30 22:23:40.469808', 'step': 7897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:40.531727', 'step': 7897, 'epoch': 2} {'type': 'loss', 'content': 0.1512409746646881, 'timestamp': '2025-09-30 22:23:40.548836', 'step': 7898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:40.617518', 'step': 7898, 'epoch': 2} {'type': 'loss', 'content': 0.09038791805505753, 'timestamp': '2025-09-30 22:23:40.621520', 'step': 7899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:40.695196', 'step': 7899, 'epoch': 2} {'type': 'loss', 'content': 0.23231737315654755, 'timestamp': '2025-09-30 22:23:40.702072', 'step': 7900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:40.769221', 'step': 7900, 'epoch': 2} {'type': 'loss', 'content': 0.11366168409585953, 'timestamp': '2025-09-30 22:23:40.786969', 'step': 7901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:40.844455', 'step': 7901, 'epoch': 2} {'type': 'loss', 'content': 0.09424559026956558, 'timestamp': '2025-09-30 22:23:40.847064', 'step': 7902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:40.905561', 'step': 7902, 'epoch': 2} {'type': 'loss', 'content': 0.15897822380065918, 'timestamp': '2025-09-30 22:23:40.913986', 'step': 7903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:40.971615', 'step': 7903, 'epoch': 2} {'type': 'loss', 'content': 0.19936034083366394, 'timestamp': '2025-09-30 22:23:40.978989', 'step': 7904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:41.036779', 'step': 7904, 'epoch': 2} {'type': 'loss', 'content': 0.16923227906227112, 'timestamp': '2025-09-30 22:23:41.048498', 'step': 7905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:41.108441', 'step': 7905, 'epoch': 2} {'type': 'loss', 'content': 0.1507083922624588, 'timestamp': '2025-09-30 22:23:41.114228', 'step': 7906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:41.171237', 'step': 7906, 'epoch': 2} {'type': 'loss', 'content': 0.1716122180223465, 'timestamp': '2025-09-30 22:23:41.174351', 'step': 7907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:41.233237', 'step': 7907, 'epoch': 2} {'type': 'loss', 'content': 0.14267472922801971, 'timestamp': '2025-09-30 22:23:41.239186', 'step': 7908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:41.296945', 'step': 7908, 'epoch': 2} {'type': 'loss', 'content': 0.16378509998321533, 'timestamp': '2025-09-30 22:23:41.299944', 'step': 7909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:41.362813', 'step': 7909, 'epoch': 2} {'type': 'loss', 'content': 0.11077854037284851, 'timestamp': '2025-09-30 22:23:41.365770', 'step': 7910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:41.435870', 'step': 7910, 'epoch': 2} {'type': 'loss', 'content': 0.13666844367980957, 'timestamp': '2025-09-30 22:23:41.438938', 'step': 7911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:41.498796', 'step': 7911, 'epoch': 2} {'type': 'loss', 'content': 0.12979307770729065, 'timestamp': '2025-09-30 22:23:41.509147', 'step': 7912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:41.577869', 'step': 7912, 'epoch': 2} {'type': 'loss', 'content': 0.19337661564350128, 'timestamp': '2025-09-30 22:23:41.585317', 'step': 7913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:41.647201', 'step': 7913, 'epoch': 2} {'type': 'loss', 'content': 0.17865021526813507, 'timestamp': '2025-09-30 22:23:41.655237', 'step': 7914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:41.719653', 'step': 7914, 'epoch': 2} {'type': 'loss', 'content': 0.1861341893672943, 'timestamp': '2025-09-30 22:23:41.722991', 'step': 7915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:41.784928', 'step': 7915, 'epoch': 2} {'type': 'loss', 'content': 0.13226357102394104, 'timestamp': '2025-09-30 22:23:41.794372', 'step': 7916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:41.855852', 'step': 7916, 'epoch': 2} {'type': 'loss', 'content': 0.161246657371521, 'timestamp': '2025-09-30 22:23:41.859334', 'step': 7917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:41.928118', 'step': 7917, 'epoch': 2} {'type': 'loss', 'content': 0.11523701250553131, 'timestamp': '2025-09-30 22:23:41.930802', 'step': 7918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:41.988668', 'step': 7918, 'epoch': 2} {'type': 'loss', 'content': 0.14465507864952087, 'timestamp': '2025-09-30 22:23:41.991924', 'step': 7919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:42.058709', 'step': 7919, 'epoch': 2} {'type': 'loss', 'content': 0.20738589763641357, 'timestamp': '2025-09-30 22:23:42.069587', 'step': 7920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:42.128567', 'step': 7920, 'epoch': 2} {'type': 'loss', 'content': 0.1346118450164795, 'timestamp': '2025-09-30 22:23:42.132035', 'step': 7921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:42.189963', 'step': 7921, 'epoch': 2} {'type': 'loss', 'content': 0.16448433697223663, 'timestamp': '2025-09-30 22:23:42.199826', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:23:58.010806', 'step': 7922, 'epoch': 2} {'type': 'pplx', 'content': 13293.538308574913, 'timestamp': '2025-09-30 22:23:58.015922', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:58.076644', 'step': 7922, 'epoch': 2} {'type': 'loss', 'content': 0.11193543672561646, 'timestamp': '2025-09-30 22:23:58.086294', 'step': 7923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.147012', 'step': 7923, 'epoch': 2} {'type': 'loss', 'content': 0.1780756413936615, 'timestamp': '2025-09-30 22:23:58.154816', 'step': 7924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:58.213221', 'step': 7924, 'epoch': 2} {'type': 'loss', 'content': 0.10826565325260162, 'timestamp': '2025-09-30 22:23:58.218141', 'step': 7925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.284611', 'step': 7925, 'epoch': 2} {'type': 'loss', 'content': 0.12878699600696564, 'timestamp': '2025-09-30 22:23:58.287993', 'step': 7926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.351789', 'step': 7926, 'epoch': 2} {'type': 'loss', 'content': 0.1150154247879982, 'timestamp': '2025-09-30 22:23:58.354428', 'step': 7927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.414078', 'step': 7927, 'epoch': 2} {'type': 'loss', 'content': 0.14559577405452728, 'timestamp': '2025-09-30 22:23:58.423462', 'step': 7928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.485453', 'step': 7928, 'epoch': 2} {'type': 'loss', 'content': 0.16433127224445343, 'timestamp': '2025-09-30 22:23:58.489088', 'step': 7929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.552986', 'step': 7929, 'epoch': 2} {'type': 'loss', 'content': 0.07300247997045517, 'timestamp': '2025-09-30 22:23:58.565280', 'step': 7930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:58.622709', 'step': 7930, 'epoch': 2} {'type': 'loss', 'content': 0.2233460694551468, 'timestamp': '2025-09-30 22:23:58.626090', 'step': 7931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.684782', 'step': 7931, 'epoch': 2} {'type': 'loss', 'content': 0.16056068241596222, 'timestamp': '2025-09-30 22:23:58.692766', 'step': 7932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:58.760064', 'step': 7932, 'epoch': 2} {'type': 'loss', 'content': 0.12474626302719116, 'timestamp': '2025-09-30 22:23:58.770912', 'step': 7933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:58.829410', 'step': 7933, 'epoch': 2} {'type': 'loss', 'content': 0.06184843182563782, 'timestamp': '2025-09-30 22:23:58.831747', 'step': 7934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:58.903124', 'step': 7934, 'epoch': 2} {'type': 'loss', 'content': 0.2326304167509079, 'timestamp': '2025-09-30 22:23:58.905955', 'step': 7935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:58.969274', 'step': 7935, 'epoch': 2} {'type': 'loss', 'content': 0.08072131872177124, 'timestamp': '2025-09-30 22:23:58.975234', 'step': 7936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:59.046246', 'step': 7936, 'epoch': 2} {'type': 'loss', 'content': 0.13282738626003265, 'timestamp': '2025-09-30 22:23:59.049417', 'step': 7937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:59.111779', 'step': 7937, 'epoch': 2} {'type': 'loss', 'content': 0.09421534836292267, 'timestamp': '2025-09-30 22:23:59.119316', 'step': 7938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:23:59.185319', 'step': 7938, 'epoch': 2} {'type': 'loss', 'content': 0.10584570467472076, 'timestamp': '2025-09-30 22:23:59.192212', 'step': 7939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:59.263609', 'step': 7939, 'epoch': 2} {'type': 'loss', 'content': 0.2756468653678894, 'timestamp': '2025-09-30 22:23:59.270254', 'step': 7940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:23:59.328500', 'step': 7940, 'epoch': 2} {'type': 'loss', 'content': 0.16413052380084991, 'timestamp': '2025-09-30 22:23:59.331422', 'step': 7941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:59.390448', 'step': 7941, 'epoch': 2} {'type': 'loss', 'content': 0.1628834754228592, 'timestamp': '2025-09-30 22:23:59.393564', 'step': 7942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:23:59.467649', 'step': 7942, 'epoch': 2} {'type': 'loss', 'content': 0.11413439363241196, 'timestamp': '2025-09-30 22:23:59.471937', 'step': 7943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:59.538211', 'step': 7943, 'epoch': 2} {'type': 'loss', 'content': 0.20557431876659393, 'timestamp': '2025-09-30 22:23:59.545315', 'step': 7944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:23:59.604100', 'step': 7944, 'epoch': 2} {'type': 'loss', 'content': 0.10260070115327835, 'timestamp': '2025-09-30 22:23:59.610179', 'step': 7945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:59.684822', 'step': 7945, 'epoch': 2} {'type': 'loss', 'content': 0.10487714409828186, 'timestamp': '2025-09-30 22:23:59.687999', 'step': 7946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:23:59.747440', 'step': 7946, 'epoch': 2} {'type': 'loss', 'content': 0.060044195502996445, 'timestamp': '2025-09-30 22:23:59.750378', 'step': 7947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:59.808112', 'step': 7947, 'epoch': 2} {'type': 'loss', 'content': 0.1931896060705185, 'timestamp': '2025-09-30 22:23:59.817685', 'step': 7948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:23:59.877265', 'step': 7948, 'epoch': 2} {'type': 'loss', 'content': 0.1737643927335739, 'timestamp': '2025-09-30 22:23:59.879415', 'step': 7949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:23:59.939876', 'step': 7949, 'epoch': 2} {'type': 'loss', 'content': 0.1541236788034439, 'timestamp': '2025-09-30 22:23:59.943180', 'step': 7950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:00.000931', 'step': 7950, 'epoch': 2} {'type': 'loss', 'content': 0.18157215416431427, 'timestamp': '2025-09-30 22:24:00.010983', 'step': 7951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:00.070918', 'step': 7951, 'epoch': 2} {'type': 'loss', 'content': 0.12113793194293976, 'timestamp': '2025-09-30 22:24:00.077520', 'step': 7952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:00.138792', 'step': 7952, 'epoch': 2} {'type': 'loss', 'content': 0.10981982201337814, 'timestamp': '2025-09-30 22:24:00.150626', 'step': 7953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:00.216039', 'step': 7953, 'epoch': 2} {'type': 'loss', 'content': 0.1665634661912918, 'timestamp': '2025-09-30 22:24:00.218535', 'step': 7954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:00.295260', 'step': 7954, 'epoch': 2} {'type': 'loss', 'content': 0.17568475008010864, 'timestamp': '2025-09-30 22:24:00.297803', 'step': 7955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:00.365451', 'step': 7955, 'epoch': 2} {'type': 'loss', 'content': 0.20312555134296417, 'timestamp': '2025-09-30 22:24:00.375867', 'step': 7956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:00.434604', 'step': 7956, 'epoch': 2} {'type': 'loss', 'content': 0.1563325822353363, 'timestamp': '2025-09-30 22:24:00.437865', 'step': 7957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:00.507115', 'step': 7957, 'epoch': 2} {'type': 'loss', 'content': 0.16390158236026764, 'timestamp': '2025-09-30 22:24:00.514408', 'step': 7958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:00.578284', 'step': 7958, 'epoch': 2} {'type': 'loss', 'content': 0.1020098328590393, 'timestamp': '2025-09-30 22:24:00.586537', 'step': 7959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:00.644724', 'step': 7959, 'epoch': 2} {'type': 'loss', 'content': 0.08718506991863251, 'timestamp': '2025-09-30 22:24:00.653081', 'step': 7960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:00.717522', 'step': 7960, 'epoch': 2} {'type': 'loss', 'content': 0.06244344636797905, 'timestamp': '2025-09-30 22:24:00.719955', 'step': 7961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:00.781557', 'step': 7961, 'epoch': 2} {'type': 'loss', 'content': 0.16458719968795776, 'timestamp': '2025-09-30 22:24:00.784923', 'step': 7962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:00.848958', 'step': 7962, 'epoch': 2} {'type': 'loss', 'content': 0.0986940860748291, 'timestamp': '2025-09-30 22:24:00.851325', 'step': 7963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:00.908847', 'step': 7963, 'epoch': 2} {'type': 'loss', 'content': 0.09947998076677322, 'timestamp': '2025-09-30 22:24:00.915131', 'step': 7964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:00.971622', 'step': 7964, 'epoch': 2} {'type': 'loss', 'content': 0.09977386146783829, 'timestamp': '2025-09-30 22:24:00.974404', 'step': 7965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:01.031418', 'step': 7965, 'epoch': 2} {'type': 'loss', 'content': 0.1559540331363678, 'timestamp': '2025-09-30 22:24:01.037659', 'step': 7966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:01.103617', 'step': 7966, 'epoch': 2} {'type': 'loss', 'content': 0.2251356691122055, 'timestamp': '2025-09-30 22:24:01.106544', 'step': 7967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:01.164750', 'step': 7967, 'epoch': 2} {'type': 'loss', 'content': 0.08755600452423096, 'timestamp': '2025-09-30 22:24:01.170710', 'step': 7968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:01.230299', 'step': 7968, 'epoch': 2} {'type': 'loss', 'content': 0.14564882218837738, 'timestamp': '2025-09-30 22:24:01.233741', 'step': 7969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:01.299695', 'step': 7969, 'epoch': 2} {'type': 'loss', 'content': 0.09872086346149445, 'timestamp': '2025-09-30 22:24:01.302480', 'step': 7970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:01.363292', 'step': 7970, 'epoch': 2} {'type': 'loss', 'content': 0.06090519204735756, 'timestamp': '2025-09-30 22:24:01.365764', 'step': 7971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:01.427210', 'step': 7971, 'epoch': 2} {'type': 'loss', 'content': 0.10162748396396637, 'timestamp': '2025-09-30 22:24:01.434947', 'step': 7972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:01.498484', 'step': 7972, 'epoch': 2} {'type': 'loss', 'content': 0.2330022007226944, 'timestamp': '2025-09-30 22:24:01.501517', 'step': 7973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:01.563275', 'step': 7973, 'epoch': 2} {'type': 'loss', 'content': 0.13858354091644287, 'timestamp': '2025-09-30 22:24:01.565640', 'step': 7974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:01.629415', 'step': 7974, 'epoch': 2} {'type': 'loss', 'content': 0.11841299384832382, 'timestamp': '2025-09-30 22:24:01.634089', 'step': 7975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:01.692695', 'step': 7975, 'epoch': 2} {'type': 'loss', 'content': 0.13432778418064117, 'timestamp': '2025-09-30 22:24:01.701550', 'step': 7976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:01.766176', 'step': 7976, 'epoch': 2} {'type': 'loss', 'content': 0.16219112277030945, 'timestamp': '2025-09-30 22:24:01.768401', 'step': 7977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:01.826392', 'step': 7977, 'epoch': 2} {'type': 'loss', 'content': 0.09263872355222702, 'timestamp': '2025-09-30 22:24:01.829334', 'step': 7978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:01.890349', 'step': 7978, 'epoch': 2} {'type': 'loss', 'content': 0.17832981050014496, 'timestamp': '2025-09-30 22:24:01.895499', 'step': 7979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:01.952376', 'step': 7979, 'epoch': 2} {'type': 'loss', 'content': 0.0951329842209816, 'timestamp': '2025-09-30 22:24:01.958620', 'step': 7980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:02.019375', 'step': 7980, 'epoch': 2} {'type': 'loss', 'content': 0.09417697787284851, 'timestamp': '2025-09-30 22:24:02.025166', 'step': 7981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:02.083188', 'step': 7981, 'epoch': 2} {'type': 'loss', 'content': 0.2852150499820709, 'timestamp': '2025-09-30 22:24:02.088071', 'step': 7982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:02.157654', 'step': 7982, 'epoch': 2} {'type': 'loss', 'content': 0.13089917600154877, 'timestamp': '2025-09-30 22:24:02.159979', 'step': 7983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:02.217893', 'step': 7983, 'epoch': 2} {'type': 'loss', 'content': 0.0633418932557106, 'timestamp': '2025-09-30 22:24:02.223623', 'step': 7984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:02.282673', 'step': 7984, 'epoch': 2} {'type': 'loss', 'content': 0.0669470950961113, 'timestamp': '2025-09-30 22:24:02.287250', 'step': 7985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:02.365737', 'step': 7985, 'epoch': 2} {'type': 'loss', 'content': 0.18056748807430267, 'timestamp': '2025-09-30 22:24:02.369484', 'step': 7986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:02.427842', 'step': 7986, 'epoch': 2} {'type': 'loss', 'content': 0.1580592542886734, 'timestamp': '2025-09-30 22:24:02.431523', 'step': 7987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:24:02.495115', 'step': 7987, 'epoch': 2} {'type': 'loss', 'content': 0.13834479451179504, 'timestamp': '2025-09-30 22:24:02.501320', 'step': 7988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:02.557359', 'step': 7988, 'epoch': 2} {'type': 'loss', 'content': 0.08873298019170761, 'timestamp': '2025-09-30 22:24:02.559543', 'step': 7989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:02.616388', 'step': 7989, 'epoch': 2} {'type': 'loss', 'content': 0.15343517065048218, 'timestamp': '2025-09-30 22:24:02.618748', 'step': 7990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:02.680124', 'step': 7990, 'epoch': 2} {'type': 'loss', 'content': 0.12023241072893143, 'timestamp': '2025-09-30 22:24:02.682745', 'step': 7991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:02.740277', 'step': 7991, 'epoch': 2} {'type': 'loss', 'content': 0.06895187497138977, 'timestamp': '2025-09-30 22:24:02.746418', 'step': 7992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:24:02.802975', 'step': 7992, 'epoch': 2} {'type': 'loss', 'content': 0.12881216406822205, 'timestamp': '2025-09-30 22:24:02.805619', 'step': 7993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:24:02.864036', 'step': 7993, 'epoch': 2} {'type': 'loss', 'content': 0.12706375122070312, 'timestamp': '2025-09-30 22:24:02.866976', 'step': 7994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:02.925612', 'step': 7994, 'epoch': 2} {'type': 'loss', 'content': 0.2516186535358429, 'timestamp': '2025-09-30 22:24:02.928760', 'step': 7995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:03.003545', 'step': 7995, 'epoch': 2} {'type': 'loss', 'content': 0.16514137387275696, 'timestamp': '2025-09-30 22:24:03.010545', 'step': 7996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:03.066944', 'step': 7996, 'epoch': 2} {'type': 'loss', 'content': 0.0912046954035759, 'timestamp': '2025-09-30 22:24:03.069764', 'step': 7997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:03.128134', 'step': 7997, 'epoch': 2} {'type': 'loss', 'content': 0.07469232380390167, 'timestamp': '2025-09-30 22:24:03.131189', 'step': 7998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:03.197461', 'step': 7998, 'epoch': 2} {'type': 'loss', 'content': 0.2154567539691925, 'timestamp': '2025-09-30 22:24:03.200799', 'step': 7999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:03.264951', 'step': 7999, 'epoch': 2} {'type': 'loss', 'content': 0.1653255820274353, 'timestamp': '2025-09-30 22:24:03.278687', 'step': 8000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8000', 'timestamp': '2025-09-30 22:24:03.735528', 'step': 8000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:03.797251', 'step': 8000, 'epoch': 2} {'type': 'loss', 'content': 0.10990843176841736, 'timestamp': '2025-09-30 22:24:03.808749', 'step': 8001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:03.870735', 'step': 8001, 'epoch': 2} {'type': 'loss', 'content': 0.1346675455570221, 'timestamp': '2025-09-30 22:24:03.876531', 'step': 8002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:03.938210', 'step': 8002, 'epoch': 2} {'type': 'loss', 'content': 0.09126364439725876, 'timestamp': '2025-09-30 22:24:03.940938', 'step': 8003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:04.002945', 'step': 8003, 'epoch': 2} {'type': 'loss', 'content': 0.10414572805166245, 'timestamp': '2025-09-30 22:24:04.009407', 'step': 8004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:04.070353', 'step': 8004, 'epoch': 2} {'type': 'loss', 'content': 0.1365746557712555, 'timestamp': '2025-09-30 22:24:04.072952', 'step': 8005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.131704', 'step': 8005, 'epoch': 2} {'type': 'loss', 'content': 0.16891932487487793, 'timestamp': '2025-09-30 22:24:04.138053', 'step': 8006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:04.197829', 'step': 8006, 'epoch': 2} {'type': 'loss', 'content': 0.13856759667396545, 'timestamp': '2025-09-30 22:24:04.208760', 'step': 8007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:04.266288', 'step': 8007, 'epoch': 2} {'type': 'loss', 'content': 0.20789559185504913, 'timestamp': '2025-09-30 22:24:04.276782', 'step': 8008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:04.345000', 'step': 8008, 'epoch': 2} {'type': 'loss', 'content': 0.16531242430210114, 'timestamp': '2025-09-30 22:24:04.348782', 'step': 8009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.408274', 'step': 8009, 'epoch': 2} {'type': 'loss', 'content': 0.08491717278957367, 'timestamp': '2025-09-30 22:24:04.411189', 'step': 8010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.468507', 'step': 8010, 'epoch': 2} {'type': 'loss', 'content': 0.1220933049917221, 'timestamp': '2025-09-30 22:24:04.472143', 'step': 8011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.531195', 'step': 8011, 'epoch': 2} {'type': 'loss', 'content': 0.19709832966327667, 'timestamp': '2025-09-30 22:24:04.537347', 'step': 8012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:04.597832', 'step': 8012, 'epoch': 2} {'type': 'loss', 'content': 0.1396835446357727, 'timestamp': '2025-09-30 22:24:04.600378', 'step': 8013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:04.660377', 'step': 8013, 'epoch': 2} {'type': 'loss', 'content': 0.180097758769989, 'timestamp': '2025-09-30 22:24:04.669143', 'step': 8014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.728285', 'step': 8014, 'epoch': 2} {'type': 'loss', 'content': 0.1289704591035843, 'timestamp': '2025-09-30 22:24:04.741677', 'step': 8015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.802786', 'step': 8015, 'epoch': 2} {'type': 'loss', 'content': 0.0959290936589241, 'timestamp': '2025-09-30 22:24:04.808757', 'step': 8016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.865862', 'step': 8016, 'epoch': 2} {'type': 'loss', 'content': 0.08712048083543777, 'timestamp': '2025-09-30 22:24:04.870424', 'step': 8017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:04.932174', 'step': 8017, 'epoch': 2} {'type': 'loss', 'content': 0.0773782879114151, 'timestamp': '2025-09-30 22:24:04.937736', 'step': 8018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:04.995364', 'step': 8018, 'epoch': 2} {'type': 'loss', 'content': 0.11294439435005188, 'timestamp': '2025-09-30 22:24:04.999120', 'step': 8019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:05.063706', 'step': 8019, 'epoch': 2} {'type': 'loss', 'content': 0.12627463042736053, 'timestamp': '2025-09-30 22:24:05.083408', 'step': 8020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:05.141775', 'step': 8020, 'epoch': 2} {'type': 'loss', 'content': 0.1718922108411789, 'timestamp': '2025-09-30 22:24:05.145481', 'step': 8021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:05.203238', 'step': 8021, 'epoch': 2} {'type': 'loss', 'content': 0.0822567418217659, 'timestamp': '2025-09-30 22:24:05.206842', 'step': 8022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:05.264374', 'step': 8022, 'epoch': 2} {'type': 'loss', 'content': 0.12139596045017242, 'timestamp': '2025-09-30 22:24:05.268800', 'step': 8023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:05.333461', 'step': 8023, 'epoch': 2} {'type': 'loss', 'content': 0.21722884476184845, 'timestamp': '2025-09-30 22:24:05.344055', 'step': 8024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:24:05.415122', 'step': 8024, 'epoch': 2} {'type': 'loss', 'content': 0.21035224199295044, 'timestamp': '2025-09-30 22:24:05.420011', 'step': 8025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:05.479057', 'step': 8025, 'epoch': 2} {'type': 'loss', 'content': 0.20343895256519318, 'timestamp': '2025-09-30 22:24:05.483137', 'step': 8026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:05.540289', 'step': 8026, 'epoch': 2} {'type': 'loss', 'content': 0.207832932472229, 'timestamp': '2025-09-30 22:24:05.545634', 'step': 8027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:05.605919', 'step': 8027, 'epoch': 2} {'type': 'loss', 'content': 0.12911057472229004, 'timestamp': '2025-09-30 22:24:05.612144', 'step': 8028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:05.681123', 'step': 8028, 'epoch': 2} {'type': 'loss', 'content': 0.15848574042320251, 'timestamp': '2025-09-30 22:24:05.685451', 'step': 8029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:05.742854', 'step': 8029, 'epoch': 2} {'type': 'loss', 'content': 0.12946374714374542, 'timestamp': '2025-09-30 22:24:05.745755', 'step': 8030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:05.813150', 'step': 8030, 'epoch': 2} {'type': 'loss', 'content': 0.1559849977493286, 'timestamp': '2025-09-30 22:24:05.816999', 'step': 8031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:05.885871', 'step': 8031, 'epoch': 2} {'type': 'loss', 'content': 0.10312071442604065, 'timestamp': '2025-09-30 22:24:05.904626', 'step': 8032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:05.972836', 'step': 8032, 'epoch': 2} {'type': 'loss', 'content': 0.056361764669418335, 'timestamp': '2025-09-30 22:24:05.977850', 'step': 8033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:06.040806', 'step': 8033, 'epoch': 2} {'type': 'loss', 'content': 0.29797080159187317, 'timestamp': '2025-09-30 22:24:06.044314', 'step': 8034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:06.109689', 'step': 8034, 'epoch': 2} {'type': 'loss', 'content': 0.049536120146512985, 'timestamp': '2025-09-30 22:24:06.113374', 'step': 8035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:06.173053', 'step': 8035, 'epoch': 2} {'type': 'loss', 'content': 0.10115312039852142, 'timestamp': '2025-09-30 22:24:06.188069', 'step': 8036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:06.261259', 'step': 8036, 'epoch': 2} {'type': 'loss', 'content': 0.05349881947040558, 'timestamp': '2025-09-30 22:24:06.274613', 'step': 8037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:06.349211', 'step': 8037, 'epoch': 2} {'type': 'loss', 'content': 0.23030200600624084, 'timestamp': '2025-09-30 22:24:06.362062', 'step': 8038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:06.438457', 'step': 8038, 'epoch': 2} {'type': 'loss', 'content': 0.2058258205652237, 'timestamp': '2025-09-30 22:24:06.442756', 'step': 8039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:06.521081', 'step': 8039, 'epoch': 2} {'type': 'loss', 'content': 0.15847930312156677, 'timestamp': '2025-09-30 22:24:06.528825', 'step': 8040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:06.595221', 'step': 8040, 'epoch': 2} {'type': 'loss', 'content': 0.09868675470352173, 'timestamp': '2025-09-30 22:24:06.599405', 'step': 8041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:06.656856', 'step': 8041, 'epoch': 2} {'type': 'loss', 'content': 0.08662943542003632, 'timestamp': '2025-09-30 22:24:06.660441', 'step': 8042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:06.717658', 'step': 8042, 'epoch': 2} {'type': 'loss', 'content': 0.09621627628803253, 'timestamp': '2025-09-30 22:24:06.729032', 'step': 8043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:06.787637', 'step': 8043, 'epoch': 2} {'type': 'loss', 'content': 0.19292263686656952, 'timestamp': '2025-09-30 22:24:06.801156', 'step': 8044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:06.857913', 'step': 8044, 'epoch': 2} {'type': 'loss', 'content': 0.1541397124528885, 'timestamp': '2025-09-30 22:24:06.861658', 'step': 8045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:24:06.920208', 'step': 8045, 'epoch': 2} {'type': 'loss', 'content': 0.1047307476401329, 'timestamp': '2025-09-30 22:24:06.932741', 'step': 8046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:06.993658', 'step': 8046, 'epoch': 2} {'type': 'loss', 'content': 0.11287610232830048, 'timestamp': '2025-09-30 22:24:06.998055', 'step': 8047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:07.061438', 'step': 8047, 'epoch': 2} {'type': 'loss', 'content': 0.12902943789958954, 'timestamp': '2025-09-30 22:24:07.068351', 'step': 8048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:07.145196', 'step': 8048, 'epoch': 2} {'type': 'loss', 'content': 0.2287239134311676, 'timestamp': '2025-09-30 22:24:07.149574', 'step': 8049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:07.209054', 'step': 8049, 'epoch': 2} {'type': 'loss', 'content': 0.14125637710094452, 'timestamp': '2025-09-30 22:24:07.220276', 'step': 8050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:07.289146', 'step': 8050, 'epoch': 2} {'type': 'loss', 'content': 0.1645011156797409, 'timestamp': '2025-09-30 22:24:07.292828', 'step': 8051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:24:07.354915', 'step': 8051, 'epoch': 2} {'type': 'loss', 'content': 0.1561274230480194, 'timestamp': '2025-09-30 22:24:07.362605', 'step': 8052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:07.423108', 'step': 8052, 'epoch': 2} {'type': 'loss', 'content': 0.19887825846672058, 'timestamp': '2025-09-30 22:24:07.427784', 'step': 8053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:07.488664', 'step': 8053, 'epoch': 2} {'type': 'loss', 'content': 0.11126845329999924, 'timestamp': '2025-09-30 22:24:07.493928', 'step': 8054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:07.555722', 'step': 8054, 'epoch': 2} {'type': 'loss', 'content': 0.14631493389606476, 'timestamp': '2025-09-30 22:24:07.558751', 'step': 8055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:07.616524', 'step': 8055, 'epoch': 2} {'type': 'loss', 'content': 0.2642880380153656, 'timestamp': '2025-09-30 22:24:07.623356', 'step': 8056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:07.683005', 'step': 8056, 'epoch': 2} {'type': 'loss', 'content': 0.14107897877693176, 'timestamp': '2025-09-30 22:24:07.687306', 'step': 8057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:07.747963', 'step': 8057, 'epoch': 2} {'type': 'loss', 'content': 0.15433967113494873, 'timestamp': '2025-09-30 22:24:07.751741', 'step': 8058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:07.811278', 'step': 8058, 'epoch': 2} {'type': 'loss', 'content': 0.14398609101772308, 'timestamp': '2025-09-30 22:24:07.814640', 'step': 8059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:07.877951', 'step': 8059, 'epoch': 2} {'type': 'loss', 'content': 0.17262212932109833, 'timestamp': '2025-09-30 22:24:07.885048', 'step': 8060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:07.943669', 'step': 8060, 'epoch': 2} {'type': 'loss', 'content': 0.07212080806493759, 'timestamp': '2025-09-30 22:24:07.947238', 'step': 8061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:08.007861', 'step': 8061, 'epoch': 2} {'type': 'loss', 'content': 0.23661494255065918, 'timestamp': '2025-09-30 22:24:08.018602', 'step': 8062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:08.086878', 'step': 8062, 'epoch': 2} {'type': 'loss', 'content': 0.16455666720867157, 'timestamp': '2025-09-30 22:24:08.090239', 'step': 8063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:08.149213', 'step': 8063, 'epoch': 2} {'type': 'loss', 'content': 0.14889994263648987, 'timestamp': '2025-09-30 22:24:08.156113', 'step': 8064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:08.225630', 'step': 8064, 'epoch': 2} {'type': 'loss', 'content': 0.11777099967002869, 'timestamp': '2025-09-30 22:24:08.228589', 'step': 8065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:08.294568', 'step': 8065, 'epoch': 2} {'type': 'loss', 'content': 0.10494857281446457, 'timestamp': '2025-09-30 22:24:08.298036', 'step': 8066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:08.361970', 'step': 8066, 'epoch': 2} {'type': 'loss', 'content': 0.07819409668445587, 'timestamp': '2025-09-30 22:24:08.369180', 'step': 8067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:08.435549', 'step': 8067, 'epoch': 2} {'type': 'loss', 'content': 0.14671890437602997, 'timestamp': '2025-09-30 22:24:08.445751', 'step': 8068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:08.505768', 'step': 8068, 'epoch': 2} {'type': 'loss', 'content': 0.05310191959142685, 'timestamp': '2025-09-30 22:24:08.509407', 'step': 8069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:08.576850', 'step': 8069, 'epoch': 2} {'type': 'loss', 'content': 0.17665404081344604, 'timestamp': '2025-09-30 22:24:08.587276', 'step': 8070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:08.651291', 'step': 8070, 'epoch': 2} {'type': 'loss', 'content': 0.18005430698394775, 'timestamp': '2025-09-30 22:24:08.654833', 'step': 8071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:08.714841', 'step': 8071, 'epoch': 2} {'type': 'loss', 'content': 0.19475574791431427, 'timestamp': '2025-09-30 22:24:08.730230', 'step': 8072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:08.798419', 'step': 8072, 'epoch': 2} {'type': 'loss', 'content': 0.11114896833896637, 'timestamp': '2025-09-30 22:24:08.803187', 'step': 8073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:08.869468', 'step': 8073, 'epoch': 2} {'type': 'loss', 'content': 0.09315916150808334, 'timestamp': '2025-09-30 22:24:08.872853', 'step': 8074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:08.941139', 'step': 8074, 'epoch': 2} {'type': 'loss', 'content': 0.22158093750476837, 'timestamp': '2025-09-30 22:24:08.944447', 'step': 8075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:09.012222', 'step': 8075, 'epoch': 2} {'type': 'loss', 'content': 0.12976767122745514, 'timestamp': '2025-09-30 22:24:09.018536', 'step': 8076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:09.079989', 'step': 8076, 'epoch': 2} {'type': 'loss', 'content': 0.17065058648586273, 'timestamp': '2025-09-30 22:24:09.087540', 'step': 8077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:09.146148', 'step': 8077, 'epoch': 2} {'type': 'loss', 'content': 0.20458556711673737, 'timestamp': '2025-09-30 22:24:09.149742', 'step': 8078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:09.209237', 'step': 8078, 'epoch': 2} {'type': 'loss', 'content': 0.13188473880290985, 'timestamp': '2025-09-30 22:24:09.211952', 'step': 8079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:09.273458', 'step': 8079, 'epoch': 2} {'type': 'loss', 'content': 0.18810147047042847, 'timestamp': '2025-09-30 22:24:09.280716', 'step': 8080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:09.355129', 'step': 8080, 'epoch': 2} {'type': 'loss', 'content': 0.16327138245105743, 'timestamp': '2025-09-30 22:24:09.358197', 'step': 8081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:09.416114', 'step': 8081, 'epoch': 2} {'type': 'loss', 'content': 0.17911601066589355, 'timestamp': '2025-09-30 22:24:09.421766', 'step': 8082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:09.482750', 'step': 8082, 'epoch': 2} {'type': 'loss', 'content': 0.18340910971164703, 'timestamp': '2025-09-30 22:24:09.486199', 'step': 8083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:09.547076', 'step': 8083, 'epoch': 2} {'type': 'loss', 'content': 0.14975884556770325, 'timestamp': '2025-09-30 22:24:09.555541', 'step': 8084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:09.630797', 'step': 8084, 'epoch': 2} {'type': 'loss', 'content': 0.10966581851243973, 'timestamp': '2025-09-30 22:24:09.634630', 'step': 8085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:09.693680', 'step': 8085, 'epoch': 2} {'type': 'loss', 'content': 0.18989920616149902, 'timestamp': '2025-09-30 22:24:09.696696', 'step': 8086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:09.755660', 'step': 8086, 'epoch': 2} {'type': 'loss', 'content': 0.2638881504535675, 'timestamp': '2025-09-30 22:24:09.758508', 'step': 8087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:09.834021', 'step': 8087, 'epoch': 2} {'type': 'loss', 'content': 0.14291998744010925, 'timestamp': '2025-09-30 22:24:09.840500', 'step': 8088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:09.900197', 'step': 8088, 'epoch': 2} {'type': 'loss', 'content': 0.2130444347858429, 'timestamp': '2025-09-30 22:24:09.903332', 'step': 8089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:09.967976', 'step': 8089, 'epoch': 2} {'type': 'loss', 'content': 0.0772867277264595, 'timestamp': '2025-09-30 22:24:09.971086', 'step': 8090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:10.030093', 'step': 8090, 'epoch': 2} {'type': 'loss', 'content': 0.11902279406785965, 'timestamp': '2025-09-30 22:24:10.038205', 'step': 8091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:10.113752', 'step': 8091, 'epoch': 2} {'type': 'loss', 'content': 0.16354699432849884, 'timestamp': '2025-09-30 22:24:10.130131', 'step': 8092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:10.197124', 'step': 8092, 'epoch': 2} {'type': 'loss', 'content': 0.1772027462720871, 'timestamp': '2025-09-30 22:24:10.199857', 'step': 8093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:10.272984', 'step': 8093, 'epoch': 2} {'type': 'loss', 'content': 0.17292927205562592, 'timestamp': '2025-09-30 22:24:10.277085', 'step': 8094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:10.337386', 'step': 8094, 'epoch': 2} {'type': 'loss', 'content': 0.12090259790420532, 'timestamp': '2025-09-30 22:24:10.340392', 'step': 8095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:10.406170', 'step': 8095, 'epoch': 2} {'type': 'loss', 'content': 0.14885921776294708, 'timestamp': '2025-09-30 22:24:10.418184', 'step': 8096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:10.504961', 'step': 8096, 'epoch': 2} {'type': 'loss', 'content': 0.11226602643728256, 'timestamp': '2025-09-30 22:24:10.509055', 'step': 8097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:10.583304', 'step': 8097, 'epoch': 2} {'type': 'loss', 'content': 0.14967286586761475, 'timestamp': '2025-09-30 22:24:10.586016', 'step': 8098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:10.673400', 'step': 8098, 'epoch': 2} {'type': 'loss', 'content': 0.16226212680339813, 'timestamp': '2025-09-30 22:24:10.677721', 'step': 8099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:10.738650', 'step': 8099, 'epoch': 2} {'type': 'loss', 'content': 0.11134561896324158, 'timestamp': '2025-09-30 22:24:10.744931', 'step': 8100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:10.815210', 'step': 8100, 'epoch': 2} {'type': 'loss', 'content': 0.13049077987670898, 'timestamp': '2025-09-30 22:24:10.817780', 'step': 8101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:10.884127', 'step': 8101, 'epoch': 2} {'type': 'loss', 'content': 0.06603602319955826, 'timestamp': '2025-09-30 22:24:10.890756', 'step': 8102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:10.957457', 'step': 8102, 'epoch': 2} {'type': 'loss', 'content': 0.1736828088760376, 'timestamp': '2025-09-30 22:24:10.964745', 'step': 8103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:11.038315', 'step': 8103, 'epoch': 2} {'type': 'loss', 'content': 0.14496378600597382, 'timestamp': '2025-09-30 22:24:11.045827', 'step': 8104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:11.127558', 'step': 8104, 'epoch': 2} {'type': 'loss', 'content': 0.1581416130065918, 'timestamp': '2025-09-30 22:24:11.134767', 'step': 8105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:11.209150', 'step': 8105, 'epoch': 2} {'type': 'loss', 'content': 0.07935454696416855, 'timestamp': '2025-09-30 22:24:11.213158', 'step': 8106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:11.290555', 'step': 8106, 'epoch': 2} {'type': 'loss', 'content': 0.19358722865581512, 'timestamp': '2025-09-30 22:24:11.301537', 'step': 8107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:11.361350', 'step': 8107, 'epoch': 2} {'type': 'loss', 'content': 0.19267378747463226, 'timestamp': '2025-09-30 22:24:11.367843', 'step': 8108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:11.426968', 'step': 8108, 'epoch': 2} {'type': 'loss', 'content': 0.1693856418132782, 'timestamp': '2025-09-30 22:24:11.438034', 'step': 8109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:11.502089', 'step': 8109, 'epoch': 2} {'type': 'loss', 'content': 0.10488338768482208, 'timestamp': '2025-09-30 22:24:11.505300', 'step': 8110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:11.567097', 'step': 8110, 'epoch': 2} {'type': 'loss', 'content': 0.24876978993415833, 'timestamp': '2025-09-30 22:24:11.571385', 'step': 8111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:11.640902', 'step': 8111, 'epoch': 2} {'type': 'loss', 'content': 0.05731930583715439, 'timestamp': '2025-09-30 22:24:11.651461', 'step': 8112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:11.715007', 'step': 8112, 'epoch': 2} {'type': 'loss', 'content': 0.1560823619365692, 'timestamp': '2025-09-30 22:24:11.717366', 'step': 8113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:11.775670', 'step': 8113, 'epoch': 2} {'type': 'loss', 'content': 0.1391422003507614, 'timestamp': '2025-09-30 22:24:11.778518', 'step': 8114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:11.843906', 'step': 8114, 'epoch': 2} {'type': 'loss', 'content': 0.19582541286945343, 'timestamp': '2025-09-30 22:24:11.847862', 'step': 8115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:11.922859', 'step': 8115, 'epoch': 2} {'type': 'loss', 'content': 0.0862564668059349, 'timestamp': '2025-09-30 22:24:11.936425', 'step': 8116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:12.001438', 'step': 8116, 'epoch': 2} {'type': 'loss', 'content': 0.11321033537387848, 'timestamp': '2025-09-30 22:24:12.004790', 'step': 8117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:12.081172', 'step': 8117, 'epoch': 2} {'type': 'loss', 'content': 0.10423741489648819, 'timestamp': '2025-09-30 22:24:12.087040', 'step': 8118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:12.159144', 'step': 8118, 'epoch': 2} {'type': 'loss', 'content': 0.13298766314983368, 'timestamp': '2025-09-30 22:24:12.165134', 'step': 8119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:12.250000', 'step': 8119, 'epoch': 2} {'type': 'loss', 'content': 0.12912195920944214, 'timestamp': '2025-09-30 22:24:12.259635', 'step': 8120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:12.316842', 'step': 8120, 'epoch': 2} {'type': 'loss', 'content': 0.1685071438550949, 'timestamp': '2025-09-30 22:24:12.322956', 'step': 8121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:12.382563', 'step': 8121, 'epoch': 2} {'type': 'loss', 'content': 0.1531801074743271, 'timestamp': '2025-09-30 22:24:12.385518', 'step': 8122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:12.445072', 'step': 8122, 'epoch': 2} {'type': 'loss', 'content': 0.25171950459480286, 'timestamp': '2025-09-30 22:24:12.448970', 'step': 8123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:12.521922', 'step': 8123, 'epoch': 2} {'type': 'loss', 'content': 0.14243994653224945, 'timestamp': '2025-09-30 22:24:12.528689', 'step': 8124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:12.588618', 'step': 8124, 'epoch': 2} {'type': 'loss', 'content': 0.16421861946582794, 'timestamp': '2025-09-30 22:24:12.597538', 'step': 8125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:12.673705', 'step': 8125, 'epoch': 2} {'type': 'loss', 'content': 0.1324840635061264, 'timestamp': '2025-09-30 22:24:12.678387', 'step': 8126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:12.736819', 'step': 8126, 'epoch': 2} {'type': 'loss', 'content': 0.25705230236053467, 'timestamp': '2025-09-30 22:24:12.741589', 'step': 8127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:12.802692', 'step': 8127, 'epoch': 2} {'type': 'loss', 'content': 0.1657714992761612, 'timestamp': '2025-09-30 22:24:12.811915', 'step': 8128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:12.869774', 'step': 8128, 'epoch': 2} {'type': 'loss', 'content': 0.26075276732444763, 'timestamp': '2025-09-30 22:24:12.872200', 'step': 8129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:24:12.944285', 'step': 8129, 'epoch': 2} {'type': 'loss', 'content': 0.15288090705871582, 'timestamp': '2025-09-30 22:24:12.947227', 'step': 8130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:13.010817', 'step': 8130, 'epoch': 2} {'type': 'loss', 'content': 0.11116234213113785, 'timestamp': '2025-09-30 22:24:13.013216', 'step': 8131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:13.074127', 'step': 8131, 'epoch': 2} {'type': 'loss', 'content': 0.08950086683034897, 'timestamp': '2025-09-30 22:24:13.080336', 'step': 8132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:13.151782', 'step': 8132, 'epoch': 2} {'type': 'loss', 'content': 0.13651327788829803, 'timestamp': '2025-09-30 22:24:13.158081', 'step': 8133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:13.222267', 'step': 8133, 'epoch': 2} {'type': 'loss', 'content': 0.1211155503988266, 'timestamp': '2025-09-30 22:24:13.224462', 'step': 8134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:13.287083', 'step': 8134, 'epoch': 2} {'type': 'loss', 'content': 0.1682073175907135, 'timestamp': '2025-09-30 22:24:13.289865', 'step': 8135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:13.349818', 'step': 8135, 'epoch': 2} {'type': 'loss', 'content': 0.11813124269247055, 'timestamp': '2025-09-30 22:24:13.359636', 'step': 8136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:13.421071', 'step': 8136, 'epoch': 2} {'type': 'loss', 'content': 0.11685187369585037, 'timestamp': '2025-09-30 22:24:13.424134', 'step': 8137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:13.480881', 'step': 8137, 'epoch': 2} {'type': 'loss', 'content': 0.1828705370426178, 'timestamp': '2025-09-30 22:24:13.483855', 'step': 8138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:13.541845', 'step': 8138, 'epoch': 2} {'type': 'loss', 'content': 0.14702755212783813, 'timestamp': '2025-09-30 22:24:13.544901', 'step': 8139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:13.606483', 'step': 8139, 'epoch': 2} {'type': 'loss', 'content': 0.1173074021935463, 'timestamp': '2025-09-30 22:24:13.615324', 'step': 8140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:13.683160', 'step': 8140, 'epoch': 2} {'type': 'loss', 'content': 0.1329614371061325, 'timestamp': '2025-09-30 22:24:13.688811', 'step': 8141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:13.749147', 'step': 8141, 'epoch': 2} {'type': 'loss', 'content': 0.19277118146419525, 'timestamp': '2025-09-30 22:24:13.753984', 'step': 8142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:13.814707', 'step': 8142, 'epoch': 2} {'type': 'loss', 'content': 0.13674907386302948, 'timestamp': '2025-09-30 22:24:13.819096', 'step': 8143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:13.878973', 'step': 8143, 'epoch': 2} {'type': 'loss', 'content': 0.0795835331082344, 'timestamp': '2025-09-30 22:24:13.885328', 'step': 8144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:13.947757', 'step': 8144, 'epoch': 2} {'type': 'loss', 'content': 0.16140717267990112, 'timestamp': '2025-09-30 22:24:13.951225', 'step': 8145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:14.027833', 'step': 8145, 'epoch': 2} {'type': 'loss', 'content': 0.14383549988269806, 'timestamp': '2025-09-30 22:24:14.030596', 'step': 8146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:14.111410', 'step': 8146, 'epoch': 2} {'type': 'loss', 'content': 0.09922141581773758, 'timestamp': '2025-09-30 22:24:14.114078', 'step': 8147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:14.171519', 'step': 8147, 'epoch': 2} {'type': 'loss', 'content': 0.11577314883470535, 'timestamp': '2025-09-30 22:24:14.177914', 'step': 8148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:14.250782', 'step': 8148, 'epoch': 2} {'type': 'loss', 'content': 0.15966694056987762, 'timestamp': '2025-09-30 22:24:14.253147', 'step': 8149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:14.316599', 'step': 8149, 'epoch': 2} {'type': 'loss', 'content': 0.185962975025177, 'timestamp': '2025-09-30 22:24:14.320428', 'step': 8150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:14.379564', 'step': 8150, 'epoch': 2} {'type': 'loss', 'content': 0.10445734858512878, 'timestamp': '2025-09-30 22:24:14.384575', 'step': 8151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:14.445659', 'step': 8151, 'epoch': 2} {'type': 'loss', 'content': 0.16943301260471344, 'timestamp': '2025-09-30 22:24:14.451860', 'step': 8152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:14.528858', 'step': 8152, 'epoch': 2} {'type': 'loss', 'content': 0.1954520344734192, 'timestamp': '2025-09-30 22:24:14.533388', 'step': 8153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:14.596816', 'step': 8153, 'epoch': 2} {'type': 'loss', 'content': 0.25098279118537903, 'timestamp': '2025-09-30 22:24:14.600592', 'step': 8154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:14.670565', 'step': 8154, 'epoch': 2} {'type': 'loss', 'content': 0.20894472301006317, 'timestamp': '2025-09-30 22:24:14.676658', 'step': 8155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:14.737427', 'step': 8155, 'epoch': 2} {'type': 'loss', 'content': 0.1280244141817093, 'timestamp': '2025-09-30 22:24:14.751162', 'step': 8156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:14.809834', 'step': 8156, 'epoch': 2} {'type': 'loss', 'content': 0.14194288849830627, 'timestamp': '2025-09-30 22:24:14.812347', 'step': 8157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:14.869880', 'step': 8157, 'epoch': 2} {'type': 'loss', 'content': 0.16278409957885742, 'timestamp': '2025-09-30 22:24:14.872714', 'step': 8158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:14.932416', 'step': 8158, 'epoch': 2} {'type': 'loss', 'content': 0.144831582903862, 'timestamp': '2025-09-30 22:24:14.935889', 'step': 8159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:15.005904', 'step': 8159, 'epoch': 2} {'type': 'loss', 'content': 0.08759520947933197, 'timestamp': '2025-09-30 22:24:15.014847', 'step': 8160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:15.074432', 'step': 8160, 'epoch': 2} {'type': 'loss', 'content': 0.08662436902523041, 'timestamp': '2025-09-30 22:24:15.078632', 'step': 8161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:15.138329', 'step': 8161, 'epoch': 2} {'type': 'loss', 'content': 0.07689586281776428, 'timestamp': '2025-09-30 22:24:15.144295', 'step': 8162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:15.204122', 'step': 8162, 'epoch': 2} {'type': 'loss', 'content': 0.158293679356575, 'timestamp': '2025-09-30 22:24:15.206754', 'step': 8163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:15.265584', 'step': 8163, 'epoch': 2} {'type': 'loss', 'content': 0.2366456538438797, 'timestamp': '2025-09-30 22:24:15.272021', 'step': 8164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:15.331644', 'step': 8164, 'epoch': 2} {'type': 'loss', 'content': 0.15326325595378876, 'timestamp': '2025-09-30 22:24:15.334436', 'step': 8165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:15.393085', 'step': 8165, 'epoch': 2} {'type': 'loss', 'content': 0.18596865236759186, 'timestamp': '2025-09-30 22:24:15.396198', 'step': 8166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:15.455411', 'step': 8166, 'epoch': 2} {'type': 'loss', 'content': 0.26010066270828247, 'timestamp': '2025-09-30 22:24:15.457910', 'step': 8167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:15.533966', 'step': 8167, 'epoch': 2} {'type': 'loss', 'content': 0.13519686460494995, 'timestamp': '2025-09-30 22:24:15.542877', 'step': 8168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:15.606475', 'step': 8168, 'epoch': 2} {'type': 'loss', 'content': 0.0930439680814743, 'timestamp': '2025-09-30 22:24:15.612914', 'step': 8169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:15.686767', 'step': 8169, 'epoch': 2} {'type': 'loss', 'content': 0.12947170436382294, 'timestamp': '2025-09-30 22:24:15.689114', 'step': 8170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:15.752302', 'step': 8170, 'epoch': 2} {'type': 'loss', 'content': 0.160970076918602, 'timestamp': '2025-09-30 22:24:15.759304', 'step': 8171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:15.818068', 'step': 8171, 'epoch': 2} {'type': 'loss', 'content': 0.11945829540491104, 'timestamp': '2025-09-30 22:24:15.825598', 'step': 8172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:15.883226', 'step': 8172, 'epoch': 2} {'type': 'loss', 'content': 0.16550886631011963, 'timestamp': '2025-09-30 22:24:15.885791', 'step': 8173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:15.946203', 'step': 8173, 'epoch': 2} {'type': 'loss', 'content': 0.14134754240512848, 'timestamp': '2025-09-30 22:24:15.948953', 'step': 8174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:16.006813', 'step': 8174, 'epoch': 2} {'type': 'loss', 'content': 0.10975965112447739, 'timestamp': '2025-09-30 22:24:16.009562', 'step': 8175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:16.075612', 'step': 8175, 'epoch': 2} {'type': 'loss', 'content': 0.1025797501206398, 'timestamp': '2025-09-30 22:24:16.082651', 'step': 8176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:16.149329', 'step': 8176, 'epoch': 2} {'type': 'loss', 'content': 0.08165939152240753, 'timestamp': '2025-09-30 22:24:16.152219', 'step': 8177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:16.222329', 'step': 8177, 'epoch': 2} {'type': 'loss', 'content': 0.17208369076251984, 'timestamp': '2025-09-30 22:24:16.225415', 'step': 8178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:16.295083', 'step': 8178, 'epoch': 2} {'type': 'loss', 'content': 0.1784643828868866, 'timestamp': '2025-09-30 22:24:16.298245', 'step': 8179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:16.367306', 'step': 8179, 'epoch': 2} {'type': 'loss', 'content': 0.07955938577651978, 'timestamp': '2025-09-30 22:24:16.374162', 'step': 8180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:16.452690', 'step': 8180, 'epoch': 2} {'type': 'loss', 'content': 0.08245962858200073, 'timestamp': '2025-09-30 22:24:16.456557', 'step': 8181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:16.522785', 'step': 8181, 'epoch': 2} {'type': 'loss', 'content': 0.18080735206604004, 'timestamp': '2025-09-30 22:24:16.528047', 'step': 8182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:16.586698', 'step': 8182, 'epoch': 2} {'type': 'loss', 'content': 0.17512410879135132, 'timestamp': '2025-09-30 22:24:16.590138', 'step': 8183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:16.648269', 'step': 8183, 'epoch': 2} {'type': 'loss', 'content': 0.04347779601812363, 'timestamp': '2025-09-30 22:24:16.654342', 'step': 8184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:16.711120', 'step': 8184, 'epoch': 2} {'type': 'loss', 'content': 0.16215431690216064, 'timestamp': '2025-09-30 22:24:16.714774', 'step': 8185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:16.775081', 'step': 8185, 'epoch': 2} {'type': 'loss', 'content': 0.16777803003787994, 'timestamp': '2025-09-30 22:24:16.782645', 'step': 8186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:16.841381', 'step': 8186, 'epoch': 2} {'type': 'loss', 'content': 0.1617635041475296, 'timestamp': '2025-09-30 22:24:16.850909', 'step': 8187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:16.913503', 'step': 8187, 'epoch': 2} {'type': 'loss', 'content': 0.08739213645458221, 'timestamp': '2025-09-30 22:24:16.920387', 'step': 8188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:16.977933', 'step': 8188, 'epoch': 2} {'type': 'loss', 'content': 0.13514769077301025, 'timestamp': '2025-09-30 22:24:16.981336', 'step': 8189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:17.044819', 'step': 8189, 'epoch': 2} {'type': 'loss', 'content': 0.1681056171655655, 'timestamp': '2025-09-30 22:24:17.053449', 'step': 8190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:17.113323', 'step': 8190, 'epoch': 2} {'type': 'loss', 'content': 0.15408837795257568, 'timestamp': '2025-09-30 22:24:17.116380', 'step': 8191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:17.173480', 'step': 8191, 'epoch': 2} {'type': 'loss', 'content': 0.09203823655843735, 'timestamp': '2025-09-30 22:24:17.180815', 'step': 8192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:17.238776', 'step': 8192, 'epoch': 2} {'type': 'loss', 'content': 0.1704462617635727, 'timestamp': '2025-09-30 22:24:17.248571', 'step': 8193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:17.312588', 'step': 8193, 'epoch': 2} {'type': 'loss', 'content': 0.20809222757816315, 'timestamp': '2025-09-30 22:24:17.325461', 'step': 8194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:17.384865', 'step': 8194, 'epoch': 2} {'type': 'loss', 'content': 0.22478938102722168, 'timestamp': '2025-09-30 22:24:17.387725', 'step': 8195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:17.460398', 'step': 8195, 'epoch': 2} {'type': 'loss', 'content': 0.09855173528194427, 'timestamp': '2025-09-30 22:24:17.468644', 'step': 8196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:17.535968', 'step': 8196, 'epoch': 2} {'type': 'loss', 'content': 0.10987583547830582, 'timestamp': '2025-09-30 22:24:17.539019', 'step': 8197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:17.598883', 'step': 8197, 'epoch': 2} {'type': 'loss', 'content': 0.12493190169334412, 'timestamp': '2025-09-30 22:24:17.603146', 'step': 8198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:17.662452', 'step': 8198, 'epoch': 2} {'type': 'loss', 'content': 0.1470012068748474, 'timestamp': '2025-09-30 22:24:17.676409', 'step': 8199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:17.744779', 'step': 8199, 'epoch': 2} {'type': 'loss', 'content': 0.0598435141146183, 'timestamp': '2025-09-30 22:24:17.761431', 'step': 8200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:17.829633', 'step': 8200, 'epoch': 2} {'type': 'loss', 'content': 0.11665842682123184, 'timestamp': '2025-09-30 22:24:17.832342', 'step': 8201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:17.909077', 'step': 8201, 'epoch': 2} {'type': 'loss', 'content': 0.13292375206947327, 'timestamp': '2025-09-30 22:24:17.911911', 'step': 8202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:17.971028', 'step': 8202, 'epoch': 2} {'type': 'loss', 'content': 0.23247018456459045, 'timestamp': '2025-09-30 22:24:17.974480', 'step': 8203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:18.033971', 'step': 8203, 'epoch': 2} {'type': 'loss', 'content': 0.1662389487028122, 'timestamp': '2025-09-30 22:24:18.041354', 'step': 8204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:18.128361', 'step': 8204, 'epoch': 2} {'type': 'loss', 'content': 0.07324064522981644, 'timestamp': '2025-09-30 22:24:18.136271', 'step': 8205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:18.201403', 'step': 8205, 'epoch': 2} {'type': 'loss', 'content': 0.12114500999450684, 'timestamp': '2025-09-30 22:24:18.204246', 'step': 8206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:18.264429', 'step': 8206, 'epoch': 2} {'type': 'loss', 'content': 0.19628342986106873, 'timestamp': '2025-09-30 22:24:18.267187', 'step': 8207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:18.325514', 'step': 8207, 'epoch': 2} {'type': 'loss', 'content': 0.1602334827184677, 'timestamp': '2025-09-30 22:24:18.331649', 'step': 8208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:18.389510', 'step': 8208, 'epoch': 2} {'type': 'loss', 'content': 0.13771091401576996, 'timestamp': '2025-09-30 22:24:18.392229', 'step': 8209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:18.464172', 'step': 8209, 'epoch': 2} {'type': 'loss', 'content': 0.17336420714855194, 'timestamp': '2025-09-30 22:24:18.466749', 'step': 8210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:18.525816', 'step': 8210, 'epoch': 2} {'type': 'loss', 'content': 0.20051276683807373, 'timestamp': '2025-09-30 22:24:18.534357', 'step': 8211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:18.609941', 'step': 8211, 'epoch': 2} {'type': 'loss', 'content': 0.2064128816127777, 'timestamp': '2025-09-30 22:24:18.616700', 'step': 8212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:18.675647', 'step': 8212, 'epoch': 2} {'type': 'loss', 'content': 0.1391775906085968, 'timestamp': '2025-09-30 22:24:18.678927', 'step': 8213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:18.738823', 'step': 8213, 'epoch': 2} {'type': 'loss', 'content': 0.20441707968711853, 'timestamp': '2025-09-30 22:24:18.752992', 'step': 8214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:18.811302', 'step': 8214, 'epoch': 2} {'type': 'loss', 'content': 0.13692188262939453, 'timestamp': '2025-09-30 22:24:18.814379', 'step': 8215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:18.874118', 'step': 8215, 'epoch': 2} {'type': 'loss', 'content': 0.10148598253726959, 'timestamp': '2025-09-30 22:24:18.890195', 'step': 8216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:18.954719', 'step': 8216, 'epoch': 2} {'type': 'loss', 'content': 0.164618581533432, 'timestamp': '2025-09-30 22:24:18.957889', 'step': 8217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.015251', 'step': 8217, 'epoch': 2} {'type': 'loss', 'content': 0.2755517363548279, 'timestamp': '2025-09-30 22:24:19.017629', 'step': 8218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:19.078480', 'step': 8218, 'epoch': 2} {'type': 'loss', 'content': 0.06049492582678795, 'timestamp': '2025-09-30 22:24:19.081259', 'step': 8219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.139376', 'step': 8219, 'epoch': 2} {'type': 'loss', 'content': 0.08769523352384567, 'timestamp': '2025-09-30 22:24:19.145877', 'step': 8220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.203299', 'step': 8220, 'epoch': 2} {'type': 'loss', 'content': 0.09908530861139297, 'timestamp': '2025-09-30 22:24:19.206738', 'step': 8221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.280650', 'step': 8221, 'epoch': 2} {'type': 'loss', 'content': 0.12110275775194168, 'timestamp': '2025-09-30 22:24:19.283375', 'step': 8222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:19.346694', 'step': 8222, 'epoch': 2} {'type': 'loss', 'content': 0.21748676896095276, 'timestamp': '2025-09-30 22:24:19.349345', 'step': 8223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.413027', 'step': 8223, 'epoch': 2} {'type': 'loss', 'content': 0.08087556064128876, 'timestamp': '2025-09-30 22:24:19.422729', 'step': 8224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:19.491552', 'step': 8224, 'epoch': 2} {'type': 'loss', 'content': 0.22512470185756683, 'timestamp': '2025-09-30 22:24:19.496874', 'step': 8225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.571743', 'step': 8225, 'epoch': 2} {'type': 'loss', 'content': 0.18740105628967285, 'timestamp': '2025-09-30 22:24:19.575390', 'step': 8226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:19.648205', 'step': 8226, 'epoch': 2} {'type': 'loss', 'content': 0.08460614830255508, 'timestamp': '2025-09-30 22:24:19.651349', 'step': 8227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.727875', 'step': 8227, 'epoch': 2} {'type': 'loss', 'content': 0.16070972383022308, 'timestamp': '2025-09-30 22:24:19.733901', 'step': 8228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:19.792192', 'step': 8228, 'epoch': 2} {'type': 'loss', 'content': 0.1906125694513321, 'timestamp': '2025-09-30 22:24:19.794795', 'step': 8229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:19.852612', 'step': 8229, 'epoch': 2} {'type': 'loss', 'content': 0.11378723382949829, 'timestamp': '2025-09-30 22:24:19.855900', 'step': 8230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:19.913194', 'step': 8230, 'epoch': 2} {'type': 'loss', 'content': 0.11895577609539032, 'timestamp': '2025-09-30 22:24:19.916190', 'step': 8231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:19.976115', 'step': 8231, 'epoch': 2} {'type': 'loss', 'content': 0.0645696148276329, 'timestamp': '2025-09-30 22:24:19.983214', 'step': 8232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:20.056260', 'step': 8232, 'epoch': 2} {'type': 'loss', 'content': 0.12553100287914276, 'timestamp': '2025-09-30 22:24:20.059301', 'step': 8233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:20.116339', 'step': 8233, 'epoch': 2} {'type': 'loss', 'content': 0.138056680560112, 'timestamp': '2025-09-30 22:24:20.119159', 'step': 8234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:20.191510', 'step': 8234, 'epoch': 2} {'type': 'loss', 'content': 0.06953893601894379, 'timestamp': '2025-09-30 22:24:20.194356', 'step': 8235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:20.265418', 'step': 8235, 'epoch': 2} {'type': 'loss', 'content': 0.15119487047195435, 'timestamp': '2025-09-30 22:24:20.272110', 'step': 8236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:20.333444', 'step': 8236, 'epoch': 2} {'type': 'loss', 'content': 0.20127862691879272, 'timestamp': '2025-09-30 22:24:20.343252', 'step': 8237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:20.401510', 'step': 8237, 'epoch': 2} {'type': 'loss', 'content': 0.05373290181159973, 'timestamp': '2025-09-30 22:24:20.407234', 'step': 8238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:20.465531', 'step': 8238, 'epoch': 2} {'type': 'loss', 'content': 0.17501302063465118, 'timestamp': '2025-09-30 22:24:20.467974', 'step': 8239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:20.537784', 'step': 8239, 'epoch': 2} {'type': 'loss', 'content': 0.1869828850030899, 'timestamp': '2025-09-30 22:24:20.544010', 'step': 8240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:20.601510', 'step': 8240, 'epoch': 2} {'type': 'loss', 'content': 0.05872749164700508, 'timestamp': '2025-09-30 22:24:20.614579', 'step': 8241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:20.673860', 'step': 8241, 'epoch': 2} {'type': 'loss', 'content': 0.11636880040168762, 'timestamp': '2025-09-30 22:24:20.680786', 'step': 8242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:20.744777', 'step': 8242, 'epoch': 2} {'type': 'loss', 'content': 0.1539774388074875, 'timestamp': '2025-09-30 22:24:20.747413', 'step': 8243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:20.808171', 'step': 8243, 'epoch': 2} {'type': 'loss', 'content': 0.17285028100013733, 'timestamp': '2025-09-30 22:24:20.814627', 'step': 8244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:20.872735', 'step': 8244, 'epoch': 2} {'type': 'loss', 'content': 0.12005016207695007, 'timestamp': '2025-09-30 22:24:20.880247', 'step': 8245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:20.953771', 'step': 8245, 'epoch': 2} {'type': 'loss', 'content': 0.22053681313991547, 'timestamp': '2025-09-30 22:24:20.956866', 'step': 8246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:21.015279', 'step': 8246, 'epoch': 2} {'type': 'loss', 'content': 0.16298072040081024, 'timestamp': '2025-09-30 22:24:21.018049', 'step': 8247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:21.102496', 'step': 8247, 'epoch': 2} {'type': 'loss', 'content': 0.100266233086586, 'timestamp': '2025-09-30 22:24:21.109656', 'step': 8248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:21.167442', 'step': 8248, 'epoch': 2} {'type': 'loss', 'content': 0.11484525352716446, 'timestamp': '2025-09-30 22:24:21.170432', 'step': 8249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:21.230139', 'step': 8249, 'epoch': 2} {'type': 'loss', 'content': 0.21782374382019043, 'timestamp': '2025-09-30 22:24:21.235048', 'step': 8250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:21.307680', 'step': 8250, 'epoch': 2} {'type': 'loss', 'content': 0.169413760304451, 'timestamp': '2025-09-30 22:24:21.313287', 'step': 8251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:21.373257', 'step': 8251, 'epoch': 2} {'type': 'loss', 'content': 0.11129774898290634, 'timestamp': '2025-09-30 22:24:21.382841', 'step': 8252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:21.442231', 'step': 8252, 'epoch': 2} {'type': 'loss', 'content': 0.13501065969467163, 'timestamp': '2025-09-30 22:24:21.447398', 'step': 8253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:21.507876', 'step': 8253, 'epoch': 2} {'type': 'loss', 'content': 0.2162379026412964, 'timestamp': '2025-09-30 22:24:21.514314', 'step': 8254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:21.576253', 'step': 8254, 'epoch': 2} {'type': 'loss', 'content': 0.08475405722856522, 'timestamp': '2025-09-30 22:24:21.579226', 'step': 8255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:21.638137', 'step': 8255, 'epoch': 2} {'type': 'loss', 'content': 0.1242009699344635, 'timestamp': '2025-09-30 22:24:21.644179', 'step': 8256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:21.705762', 'step': 8256, 'epoch': 2} {'type': 'loss', 'content': 0.07659067958593369, 'timestamp': '2025-09-30 22:24:21.708290', 'step': 8257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:21.771258', 'step': 8257, 'epoch': 2} {'type': 'loss', 'content': 0.08544457703828812, 'timestamp': '2025-09-30 22:24:21.773888', 'step': 8258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:21.836240', 'step': 8258, 'epoch': 2} {'type': 'loss', 'content': 0.16961458325386047, 'timestamp': '2025-09-30 22:24:21.842733', 'step': 8259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:21.911512', 'step': 8259, 'epoch': 2} {'type': 'loss', 'content': 0.12997092306613922, 'timestamp': '2025-09-30 22:24:21.917851', 'step': 8260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:21.978990', 'step': 8260, 'epoch': 2} {'type': 'loss', 'content': 0.15368513762950897, 'timestamp': '2025-09-30 22:24:21.986264', 'step': 8261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:22.047612', 'step': 8261, 'epoch': 2} {'type': 'loss', 'content': 0.10577552020549774, 'timestamp': '2025-09-30 22:24:22.051467', 'step': 8262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:22.110123', 'step': 8262, 'epoch': 2} {'type': 'loss', 'content': 0.20809435844421387, 'timestamp': '2025-09-30 22:24:22.112627', 'step': 8263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:22.173680', 'step': 8263, 'epoch': 2} {'type': 'loss', 'content': 0.08374366909265518, 'timestamp': '2025-09-30 22:24:22.180453', 'step': 8264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:22.242390', 'step': 8264, 'epoch': 2} {'type': 'loss', 'content': 0.06340892612934113, 'timestamp': '2025-09-30 22:24:22.246063', 'step': 8265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:22.304359', 'step': 8265, 'epoch': 2} {'type': 'loss', 'content': 0.14501342177391052, 'timestamp': '2025-09-30 22:24:22.307453', 'step': 8266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:22.366125', 'step': 8266, 'epoch': 2} {'type': 'loss', 'content': 0.1642104685306549, 'timestamp': '2025-09-30 22:24:22.369024', 'step': 8267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:22.426660', 'step': 8267, 'epoch': 2} {'type': 'loss', 'content': 0.16307127475738525, 'timestamp': '2025-09-30 22:24:22.433066', 'step': 8268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:22.514398', 'step': 8268, 'epoch': 2} {'type': 'loss', 'content': 0.1639225333929062, 'timestamp': '2025-09-30 22:24:22.521860', 'step': 8269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:22.579856', 'step': 8269, 'epoch': 2} {'type': 'loss', 'content': 0.15071633458137512, 'timestamp': '2025-09-30 22:24:22.586469', 'step': 8270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:22.661833', 'step': 8270, 'epoch': 2} {'type': 'loss', 'content': 0.1390998512506485, 'timestamp': '2025-09-30 22:24:22.664700', 'step': 8271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:22.722308', 'step': 8271, 'epoch': 2} {'type': 'loss', 'content': 0.19403088092803955, 'timestamp': '2025-09-30 22:24:22.736596', 'step': 8272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:22.792874', 'step': 8272, 'epoch': 2} {'type': 'loss', 'content': 0.08518163859844208, 'timestamp': '2025-09-30 22:24:22.795976', 'step': 8273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:22.855452', 'step': 8273, 'epoch': 2} {'type': 'loss', 'content': 0.1236763745546341, 'timestamp': '2025-09-30 22:24:22.858607', 'step': 8274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:24:22.921713', 'step': 8274, 'epoch': 2} {'type': 'loss', 'content': 0.10333968698978424, 'timestamp': '2025-09-30 22:24:22.927330', 'step': 8275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:22.988821', 'step': 8275, 'epoch': 2} {'type': 'loss', 'content': 0.13640065491199493, 'timestamp': '2025-09-30 22:24:22.997315', 'step': 8276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:23.053862', 'step': 8276, 'epoch': 2} {'type': 'loss', 'content': 0.17495089769363403, 'timestamp': '2025-09-30 22:24:23.063578', 'step': 8277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:23.120982', 'step': 8277, 'epoch': 2} {'type': 'loss', 'content': 0.1406995803117752, 'timestamp': '2025-09-30 22:24:23.123414', 'step': 8278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:23.181651', 'step': 8278, 'epoch': 2} {'type': 'loss', 'content': 0.14131051301956177, 'timestamp': '2025-09-30 22:24:23.184437', 'step': 8279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:23.247486', 'step': 8279, 'epoch': 2} {'type': 'loss', 'content': 0.10522633790969849, 'timestamp': '2025-09-30 22:24:23.253590', 'step': 8280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:23.326644', 'step': 8280, 'epoch': 2} {'type': 'loss', 'content': 0.1783389151096344, 'timestamp': '2025-09-30 22:24:23.330510', 'step': 8281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:23.400396', 'step': 8281, 'epoch': 2} {'type': 'loss', 'content': 0.12595036625862122, 'timestamp': '2025-09-30 22:24:23.403106', 'step': 8282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:23.463676', 'step': 8282, 'epoch': 2} {'type': 'loss', 'content': 0.10248561948537827, 'timestamp': '2025-09-30 22:24:23.467756', 'step': 8283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:23.525650', 'step': 8283, 'epoch': 2} {'type': 'loss', 'content': 0.10191389173269272, 'timestamp': '2025-09-30 22:24:23.539570', 'step': 8284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:23.613294', 'step': 8284, 'epoch': 2} {'type': 'loss', 'content': 0.20458346605300903, 'timestamp': '2025-09-30 22:24:23.617012', 'step': 8285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:23.687760', 'step': 8285, 'epoch': 2} {'type': 'loss', 'content': 0.08562502264976501, 'timestamp': '2025-09-30 22:24:23.694635', 'step': 8286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:23.759209', 'step': 8286, 'epoch': 2} {'type': 'loss', 'content': 0.27352210879325867, 'timestamp': '2025-09-30 22:24:23.762888', 'step': 8287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:23.822306', 'step': 8287, 'epoch': 2} {'type': 'loss', 'content': 0.1644003987312317, 'timestamp': '2025-09-30 22:24:23.828878', 'step': 8288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:23.891249', 'step': 8288, 'epoch': 2} {'type': 'loss', 'content': 0.10352055728435516, 'timestamp': '2025-09-30 22:24:23.895509', 'step': 8289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:23.955450', 'step': 8289, 'epoch': 2} {'type': 'loss', 'content': 0.12166747450828552, 'timestamp': '2025-09-30 22:24:23.961484', 'step': 8290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.027301', 'step': 8290, 'epoch': 2} {'type': 'loss', 'content': 0.19849170744419098, 'timestamp': '2025-09-30 22:24:24.042550', 'step': 8291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.103413', 'step': 8291, 'epoch': 2} {'type': 'loss', 'content': 0.1310058981180191, 'timestamp': '2025-09-30 22:24:24.110583', 'step': 8292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.169728', 'step': 8292, 'epoch': 2} {'type': 'loss', 'content': 0.07822021842002869, 'timestamp': '2025-09-30 22:24:24.179539', 'step': 8293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:24.243741', 'step': 8293, 'epoch': 2} {'type': 'loss', 'content': 0.12620678544044495, 'timestamp': '2025-09-30 22:24:24.247638', 'step': 8294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:24.323702', 'step': 8294, 'epoch': 2} {'type': 'loss', 'content': 0.08994048833847046, 'timestamp': '2025-09-30 22:24:24.334993', 'step': 8295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:24.394338', 'step': 8295, 'epoch': 2} {'type': 'loss', 'content': 0.096424400806427, 'timestamp': '2025-09-30 22:24:24.401331', 'step': 8296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.470761', 'step': 8296, 'epoch': 2} {'type': 'loss', 'content': 0.19107334315776825, 'timestamp': '2025-09-30 22:24:24.473851', 'step': 8297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:24.537123', 'step': 8297, 'epoch': 2} {'type': 'loss', 'content': 0.14006508886814117, 'timestamp': '2025-09-30 22:24:24.555410', 'step': 8298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:24.615165', 'step': 8298, 'epoch': 2} {'type': 'loss', 'content': 0.2721353769302368, 'timestamp': '2025-09-30 22:24:24.619539', 'step': 8299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.678727', 'step': 8299, 'epoch': 2} {'type': 'loss', 'content': 0.17137376964092255, 'timestamp': '2025-09-30 22:24:24.695215', 'step': 8300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:24.755151', 'step': 8300, 'epoch': 2} {'type': 'loss', 'content': 0.1702975332736969, 'timestamp': '2025-09-30 22:24:24.758971', 'step': 8301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.829006', 'step': 8301, 'epoch': 2} {'type': 'loss', 'content': 0.08751284331083298, 'timestamp': '2025-09-30 22:24:24.833307', 'step': 8302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.906932', 'step': 8302, 'epoch': 2} {'type': 'loss', 'content': 0.17328032851219177, 'timestamp': '2025-09-30 22:24:24.911362', 'step': 8303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:24.986471', 'step': 8303, 'epoch': 2} {'type': 'loss', 'content': 0.1624894142150879, 'timestamp': '2025-09-30 22:24:24.995317', 'step': 8304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:25.056532', 'step': 8304, 'epoch': 2} {'type': 'loss', 'content': 0.1568010151386261, 'timestamp': '2025-09-30 22:24:25.061378', 'step': 8305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:25.137135', 'step': 8305, 'epoch': 2} {'type': 'loss', 'content': 0.15966486930847168, 'timestamp': '2025-09-30 22:24:25.141838', 'step': 8306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:25.218863', 'step': 8306, 'epoch': 2} {'type': 'loss', 'content': 0.1383162885904312, 'timestamp': '2025-09-30 22:24:25.233506', 'step': 8307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:25.294969', 'step': 8307, 'epoch': 2} {'type': 'loss', 'content': 0.14194846153259277, 'timestamp': '2025-09-30 22:24:25.302765', 'step': 8308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:25.375565', 'step': 8308, 'epoch': 2} {'type': 'loss', 'content': 0.13387195765972137, 'timestamp': '2025-09-30 22:24:25.378992', 'step': 8309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:25.447327', 'step': 8309, 'epoch': 2} {'type': 'loss', 'content': 0.10383433103561401, 'timestamp': '2025-09-30 22:24:25.451427', 'step': 8310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:25.534746', 'step': 8310, 'epoch': 2} {'type': 'loss', 'content': 0.17409752309322357, 'timestamp': '2025-09-30 22:24:25.538034', 'step': 8311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:25.607614', 'step': 8311, 'epoch': 2} {'type': 'loss', 'content': 0.1312108188867569, 'timestamp': '2025-09-30 22:24:25.615973', 'step': 8312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:25.687779', 'step': 8312, 'epoch': 2} {'type': 'loss', 'content': 0.04041694477200508, 'timestamp': '2025-09-30 22:24:25.692685', 'step': 8313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:25.759942', 'step': 8313, 'epoch': 2} {'type': 'loss', 'content': 0.12985217571258545, 'timestamp': '2025-09-30 22:24:25.763362', 'step': 8314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:25.824736', 'step': 8314, 'epoch': 2} {'type': 'loss', 'content': 0.15562696754932404, 'timestamp': '2025-09-30 22:24:25.828227', 'step': 8315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:25.890096', 'step': 8315, 'epoch': 2} {'type': 'loss', 'content': 0.13671308755874634, 'timestamp': '2025-09-30 22:24:25.898149', 'step': 8316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:25.980149', 'step': 8316, 'epoch': 2} {'type': 'loss', 'content': 0.18438617885112762, 'timestamp': '2025-09-30 22:24:25.985248', 'step': 8317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:26.047966', 'step': 8317, 'epoch': 2} {'type': 'loss', 'content': 0.16780485212802887, 'timestamp': '2025-09-30 22:24:26.051880', 'step': 8318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:26.127214', 'step': 8318, 'epoch': 2} {'type': 'loss', 'content': 0.08677221834659576, 'timestamp': '2025-09-30 22:24:26.140691', 'step': 8319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:26.211066', 'step': 8319, 'epoch': 2} {'type': 'loss', 'content': 0.15890097618103027, 'timestamp': '2025-09-30 22:24:26.218754', 'step': 8320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:26.303431', 'step': 8320, 'epoch': 2} {'type': 'loss', 'content': 0.09571784734725952, 'timestamp': '2025-09-30 22:24:26.316423', 'step': 8321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:26.377094', 'step': 8321, 'epoch': 2} {'type': 'loss', 'content': 0.05140833929181099, 'timestamp': '2025-09-30 22:24:26.380473', 'step': 8322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:26.442245', 'step': 8322, 'epoch': 2} {'type': 'loss', 'content': 0.2608809471130371, 'timestamp': '2025-09-30 22:24:26.449442', 'step': 8323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:26.509449', 'step': 8323, 'epoch': 2} {'type': 'loss', 'content': 0.10791939496994019, 'timestamp': '2025-09-30 22:24:26.516504', 'step': 8324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:26.578388', 'step': 8324, 'epoch': 2} {'type': 'loss', 'content': 0.18309618532657623, 'timestamp': '2025-09-30 22:24:26.581292', 'step': 8325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:26.656957', 'step': 8325, 'epoch': 2} {'type': 'loss', 'content': 0.18408752977848053, 'timestamp': '2025-09-30 22:24:26.669340', 'step': 8326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:26.751499', 'step': 8326, 'epoch': 2} {'type': 'loss', 'content': 0.13547050952911377, 'timestamp': '2025-09-30 22:24:26.756904', 'step': 8327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:26.837495', 'step': 8327, 'epoch': 2} {'type': 'loss', 'content': 0.19892898201942444, 'timestamp': '2025-09-30 22:24:26.852696', 'step': 8328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:26.918860', 'step': 8328, 'epoch': 2} {'type': 'loss', 'content': 0.12388791888952255, 'timestamp': '2025-09-30 22:24:26.921927', 'step': 8329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:26.993263', 'step': 8329, 'epoch': 2} {'type': 'loss', 'content': 0.1704293191432953, 'timestamp': '2025-09-30 22:24:27.006288', 'step': 8330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:27.071991', 'step': 8330, 'epoch': 2} {'type': 'loss', 'content': 0.0664481669664383, 'timestamp': '2025-09-30 22:24:27.083644', 'step': 8331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:27.154411', 'step': 8331, 'epoch': 2} {'type': 'loss', 'content': 0.16056767106056213, 'timestamp': '2025-09-30 22:24:27.161933', 'step': 8332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:27.222553', 'step': 8332, 'epoch': 2} {'type': 'loss', 'content': 0.11808890104293823, 'timestamp': '2025-09-30 22:24:27.226470', 'step': 8333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:27.285222', 'step': 8333, 'epoch': 2} {'type': 'loss', 'content': 0.2430141717195511, 'timestamp': '2025-09-30 22:24:27.289122', 'step': 8334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:27.353080', 'step': 8334, 'epoch': 2} {'type': 'loss', 'content': 0.11583193391561508, 'timestamp': '2025-09-30 22:24:27.365039', 'step': 8335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:27.435129', 'step': 8335, 'epoch': 2} {'type': 'loss', 'content': 0.15819036960601807, 'timestamp': '2025-09-30 22:24:27.443463', 'step': 8336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:27.504323', 'step': 8336, 'epoch': 2} {'type': 'loss', 'content': 0.12241273373365402, 'timestamp': '2025-09-30 22:24:27.508200', 'step': 8337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:27.573454', 'step': 8337, 'epoch': 2} {'type': 'loss', 'content': 0.17592458426952362, 'timestamp': '2025-09-30 22:24:27.578039', 'step': 8338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:27.655587', 'step': 8338, 'epoch': 2} {'type': 'loss', 'content': 0.1280277520418167, 'timestamp': '2025-09-30 22:24:27.670505', 'step': 8339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:27.738306', 'step': 8339, 'epoch': 2} {'type': 'loss', 'content': 0.1078057810664177, 'timestamp': '2025-09-30 22:24:27.747858', 'step': 8340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:27.812194', 'step': 8340, 'epoch': 2} {'type': 'loss', 'content': 0.09035452455282211, 'timestamp': '2025-09-30 22:24:27.818577', 'step': 8341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:27.899614', 'step': 8341, 'epoch': 2} {'type': 'loss', 'content': 0.17325849831104279, 'timestamp': '2025-09-30 22:24:27.905634', 'step': 8342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:27.974787', 'step': 8342, 'epoch': 2} {'type': 'loss', 'content': 0.13266396522521973, 'timestamp': '2025-09-30 22:24:27.982735', 'step': 8343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:28.045271', 'step': 8343, 'epoch': 2} {'type': 'loss', 'content': 0.14119422435760498, 'timestamp': '2025-09-30 22:24:28.056230', 'step': 8344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:28.116169', 'step': 8344, 'epoch': 2} {'type': 'loss', 'content': 0.1429256796836853, 'timestamp': '2025-09-30 22:24:28.120642', 'step': 8345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:28.187908', 'step': 8345, 'epoch': 2} {'type': 'loss', 'content': 0.28580185770988464, 'timestamp': '2025-09-30 22:24:28.194373', 'step': 8346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:28.260805', 'step': 8346, 'epoch': 2} {'type': 'loss', 'content': 0.18526794016361237, 'timestamp': '2025-09-30 22:24:28.264433', 'step': 8347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:28.325263', 'step': 8347, 'epoch': 2} {'type': 'loss', 'content': 0.2001590132713318, 'timestamp': '2025-09-30 22:24:28.332398', 'step': 8348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:28.394415', 'step': 8348, 'epoch': 2} {'type': 'loss', 'content': 0.23476575314998627, 'timestamp': '2025-09-30 22:24:28.409094', 'step': 8349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:28.483118', 'step': 8349, 'epoch': 2} {'type': 'loss', 'content': 0.06620810925960541, 'timestamp': '2025-09-30 22:24:28.492028', 'step': 8350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:28.552084', 'step': 8350, 'epoch': 2} {'type': 'loss', 'content': 0.15548889338970184, 'timestamp': '2025-09-30 22:24:28.555221', 'step': 8351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:28.638876', 'step': 8351, 'epoch': 2} {'type': 'loss', 'content': 0.09218977391719818, 'timestamp': '2025-09-30 22:24:28.647172', 'step': 8352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:28.706724', 'step': 8352, 'epoch': 2} {'type': 'loss', 'content': 0.12202746421098709, 'timestamp': '2025-09-30 22:24:28.712147', 'step': 8353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:28.770830', 'step': 8353, 'epoch': 2} {'type': 'loss', 'content': 0.13008925318717957, 'timestamp': '2025-09-30 22:24:28.775305', 'step': 8354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:28.840998', 'step': 8354, 'epoch': 2} {'type': 'loss', 'content': 0.10367656499147415, 'timestamp': '2025-09-30 22:24:28.849105', 'step': 8355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:28.936233', 'step': 8355, 'epoch': 2} {'type': 'loss', 'content': 0.14455091953277588, 'timestamp': '2025-09-30 22:24:28.951205', 'step': 8356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:29.035552', 'step': 8356, 'epoch': 2} {'type': 'loss', 'content': 0.13227035105228424, 'timestamp': '2025-09-30 22:24:29.041263', 'step': 8357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:29.099221', 'step': 8357, 'epoch': 2} {'type': 'loss', 'content': 0.11672090739011765, 'timestamp': '2025-09-30 22:24:29.102926', 'step': 8358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:29.164444', 'step': 8358, 'epoch': 2} {'type': 'loss', 'content': 0.1413399577140808, 'timestamp': '2025-09-30 22:24:29.181974', 'step': 8359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:29.250328', 'step': 8359, 'epoch': 2} {'type': 'loss', 'content': 0.27850887179374695, 'timestamp': '2025-09-30 22:24:29.257636', 'step': 8360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:29.323625', 'step': 8360, 'epoch': 2} {'type': 'loss', 'content': 0.09390152990818024, 'timestamp': '2025-09-30 22:24:29.327064', 'step': 8361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:29.390925', 'step': 8361, 'epoch': 2} {'type': 'loss', 'content': 0.11771266907453537, 'timestamp': '2025-09-30 22:24:29.412365', 'step': 8362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:29.486347', 'step': 8362, 'epoch': 2} {'type': 'loss', 'content': 0.11084399372339249, 'timestamp': '2025-09-30 22:24:29.490524', 'step': 8363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:29.549890', 'step': 8363, 'epoch': 2} {'type': 'loss', 'content': 0.11378435790538788, 'timestamp': '2025-09-30 22:24:29.563618', 'step': 8364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:29.631690', 'step': 8364, 'epoch': 2} {'type': 'loss', 'content': 0.15046396851539612, 'timestamp': '2025-09-30 22:24:29.636332', 'step': 8365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:29.695008', 'step': 8365, 'epoch': 2} {'type': 'loss', 'content': 0.18304337561130524, 'timestamp': '2025-09-30 22:24:29.704282', 'step': 8366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:29.767850', 'step': 8366, 'epoch': 2} {'type': 'loss', 'content': 0.1276666820049286, 'timestamp': '2025-09-30 22:24:29.771451', 'step': 8367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:29.832984', 'step': 8367, 'epoch': 2} {'type': 'loss', 'content': 0.10235162824392319, 'timestamp': '2025-09-30 22:24:29.847704', 'step': 8368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:29.916844', 'step': 8368, 'epoch': 2} {'type': 'loss', 'content': 0.16753263771533966, 'timestamp': '2025-09-30 22:24:29.925997', 'step': 8369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:29.990595', 'step': 8369, 'epoch': 2} {'type': 'loss', 'content': 0.12738075852394104, 'timestamp': '2025-09-30 22:24:29.994310', 'step': 8370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:30.070695', 'step': 8370, 'epoch': 2} {'type': 'loss', 'content': 0.1469712108373642, 'timestamp': '2025-09-30 22:24:30.075163', 'step': 8371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:30.147252', 'step': 8371, 'epoch': 2} {'type': 'loss', 'content': 0.1529317945241928, 'timestamp': '2025-09-30 22:24:30.159772', 'step': 8372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:30.227593', 'step': 8372, 'epoch': 2} {'type': 'loss', 'content': 0.12001106888055801, 'timestamp': '2025-09-30 22:24:30.230505', 'step': 8373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:30.289586', 'step': 8373, 'epoch': 2} {'type': 'loss', 'content': 0.07063900679349899, 'timestamp': '2025-09-30 22:24:30.297238', 'step': 8374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:24:30.373204', 'step': 8374, 'epoch': 2} {'type': 'loss', 'content': 0.24126005172729492, 'timestamp': '2025-09-30 22:24:30.376840', 'step': 8375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:30.442292', 'step': 8375, 'epoch': 2} {'type': 'loss', 'content': 0.13995477557182312, 'timestamp': '2025-09-30 22:24:30.450375', 'step': 8376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:30.510672', 'step': 8376, 'epoch': 2} {'type': 'loss', 'content': 0.22480547428131104, 'timestamp': '2025-09-30 22:24:30.521345', 'step': 8377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:30.580746', 'step': 8377, 'epoch': 2} {'type': 'loss', 'content': 0.15643805265426636, 'timestamp': '2025-09-30 22:24:30.583802', 'step': 8378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:30.642351', 'step': 8378, 'epoch': 2} {'type': 'loss', 'content': 0.21735993027687073, 'timestamp': '2025-09-30 22:24:30.647586', 'step': 8379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:30.730474', 'step': 8379, 'epoch': 2} {'type': 'loss', 'content': 0.15209481120109558, 'timestamp': '2025-09-30 22:24:30.736776', 'step': 8380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:30.795107', 'step': 8380, 'epoch': 2} {'type': 'loss', 'content': 0.19946305453777313, 'timestamp': '2025-09-30 22:24:30.798733', 'step': 8381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:30.856801', 'step': 8381, 'epoch': 2} {'type': 'loss', 'content': 0.12074963748455048, 'timestamp': '2025-09-30 22:24:30.860068', 'step': 8382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:30.918249', 'step': 8382, 'epoch': 2} {'type': 'loss', 'content': 0.146369069814682, 'timestamp': '2025-09-30 22:24:30.921634', 'step': 8383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:30.982653', 'step': 8383, 'epoch': 2} {'type': 'loss', 'content': 0.221361443400383, 'timestamp': '2025-09-30 22:24:30.991562', 'step': 8384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:31.052664', 'step': 8384, 'epoch': 2} {'type': 'loss', 'content': 0.10351166874170303, 'timestamp': '2025-09-30 22:24:31.057412', 'step': 8385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:31.140903', 'step': 8385, 'epoch': 2} {'type': 'loss', 'content': 0.22285926342010498, 'timestamp': '2025-09-30 22:24:31.144909', 'step': 8386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:31.211817', 'step': 8386, 'epoch': 2} {'type': 'loss', 'content': 0.21714679896831512, 'timestamp': '2025-09-30 22:24:31.215351', 'step': 8387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:31.276139', 'step': 8387, 'epoch': 2} {'type': 'loss', 'content': 0.126314178109169, 'timestamp': '2025-09-30 22:24:31.283982', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:24:47.173965', 'step': 8388, 'epoch': 2} {'type': 'pplx', 'content': 11177.459131948213, 'timestamp': '2025-09-30 22:24:47.179284', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:47.240717', 'step': 8388, 'epoch': 2} {'type': 'loss', 'content': 0.10975033044815063, 'timestamp': '2025-09-30 22:24:47.246109', 'step': 8389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:47.305261', 'step': 8389, 'epoch': 2} {'type': 'loss', 'content': 0.11767338216304779, 'timestamp': '2025-09-30 22:24:47.308929', 'step': 8390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:47.386807', 'step': 8390, 'epoch': 2} {'type': 'loss', 'content': 0.08888646215200424, 'timestamp': '2025-09-30 22:24:47.389635', 'step': 8391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:47.450837', 'step': 8391, 'epoch': 2} {'type': 'loss', 'content': 0.22579650580883026, 'timestamp': '2025-09-30 22:24:47.457990', 'step': 8392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:47.534890', 'step': 8392, 'epoch': 2} {'type': 'loss', 'content': 0.08419172465801239, 'timestamp': '2025-09-30 22:24:47.552580', 'step': 8393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:47.610836', 'step': 8393, 'epoch': 2} {'type': 'loss', 'content': 0.10720384120941162, 'timestamp': '2025-09-30 22:24:47.616296', 'step': 8394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:47.685106', 'step': 8394, 'epoch': 2} {'type': 'loss', 'content': 0.16419322788715363, 'timestamp': '2025-09-30 22:24:47.690779', 'step': 8395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:47.748892', 'step': 8395, 'epoch': 2} {'type': 'loss', 'content': 0.09257373213768005, 'timestamp': '2025-09-30 22:24:47.755697', 'step': 8396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:47.812818', 'step': 8396, 'epoch': 2} {'type': 'loss', 'content': 0.17154373228549957, 'timestamp': '2025-09-30 22:24:47.824998', 'step': 8397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:47.894615', 'step': 8397, 'epoch': 2} {'type': 'loss', 'content': 0.21789340674877167, 'timestamp': '2025-09-30 22:24:47.898313', 'step': 8398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:24:47.960792', 'step': 8398, 'epoch': 2} {'type': 'loss', 'content': 0.17172370851039886, 'timestamp': '2025-09-30 22:24:47.963918', 'step': 8399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:48.022662', 'step': 8399, 'epoch': 2} {'type': 'loss', 'content': 0.17086492478847504, 'timestamp': '2025-09-30 22:24:48.029371', 'step': 8400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:48.086349', 'step': 8400, 'epoch': 2} {'type': 'loss', 'content': 0.13756462931632996, 'timestamp': '2025-09-30 22:24:48.090126', 'step': 8401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:48.159945', 'step': 8401, 'epoch': 2} {'type': 'loss', 'content': 0.15761521458625793, 'timestamp': '2025-09-30 22:24:48.174940', 'step': 8402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:48.244801', 'step': 8402, 'epoch': 2} {'type': 'loss', 'content': 0.16699762642383575, 'timestamp': '2025-09-30 22:24:48.257754', 'step': 8403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:48.317796', 'step': 8403, 'epoch': 2} {'type': 'loss', 'content': 0.08405579626560211, 'timestamp': '2025-09-30 22:24:48.340984', 'step': 8404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:48.402245', 'step': 8404, 'epoch': 2} {'type': 'loss', 'content': 0.12517288327217102, 'timestamp': '2025-09-30 22:24:48.406352', 'step': 8405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:48.466141', 'step': 8405, 'epoch': 2} {'type': 'loss', 'content': 0.07793471217155457, 'timestamp': '2025-09-30 22:24:48.486238', 'step': 8406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:48.549882', 'step': 8406, 'epoch': 2} {'type': 'loss', 'content': 0.16184969246387482, 'timestamp': '2025-09-30 22:24:48.556276', 'step': 8407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:48.633991', 'step': 8407, 'epoch': 2} {'type': 'loss', 'content': 0.16310645639896393, 'timestamp': '2025-09-30 22:24:48.643886', 'step': 8408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:48.703761', 'step': 8408, 'epoch': 2} {'type': 'loss', 'content': 0.17343172430992126, 'timestamp': '2025-09-30 22:24:48.728110', 'step': 8409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:48.793761', 'step': 8409, 'epoch': 2} {'type': 'loss', 'content': 0.12397512048482895, 'timestamp': '2025-09-30 22:24:48.807019', 'step': 8410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:48.867074', 'step': 8410, 'epoch': 2} {'type': 'loss', 'content': 0.18938705325126648, 'timestamp': '2025-09-30 22:24:48.872795', 'step': 8411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:48.933557', 'step': 8411, 'epoch': 2} {'type': 'loss', 'content': 0.1737314611673355, 'timestamp': '2025-09-30 22:24:48.940918', 'step': 8412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.000199', 'step': 8412, 'epoch': 2} {'type': 'loss', 'content': 0.09586435556411743, 'timestamp': '2025-09-30 22:24:49.005629', 'step': 8413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:49.064085', 'step': 8413, 'epoch': 2} {'type': 'loss', 'content': 0.07115201652050018, 'timestamp': '2025-09-30 22:24:49.069327', 'step': 8414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.128818', 'step': 8414, 'epoch': 2} {'type': 'loss', 'content': 0.101438969373703, 'timestamp': '2025-09-30 22:24:49.133250', 'step': 8415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.193774', 'step': 8415, 'epoch': 2} {'type': 'loss', 'content': 0.1079145073890686, 'timestamp': '2025-09-30 22:24:49.200911', 'step': 8416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.279094', 'step': 8416, 'epoch': 2} {'type': 'loss', 'content': 0.17228420078754425, 'timestamp': '2025-09-30 22:24:49.283837', 'step': 8417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:49.351770', 'step': 8417, 'epoch': 2} {'type': 'loss', 'content': 0.13845999538898468, 'timestamp': '2025-09-30 22:24:49.356841', 'step': 8418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.427611', 'step': 8418, 'epoch': 2} {'type': 'loss', 'content': 0.20225286483764648, 'timestamp': '2025-09-30 22:24:49.442269', 'step': 8419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.515270', 'step': 8419, 'epoch': 2} {'type': 'loss', 'content': 0.1580009162425995, 'timestamp': '2025-09-30 22:24:49.534941', 'step': 8420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.592952', 'step': 8420, 'epoch': 2} {'type': 'loss', 'content': 0.11266171932220459, 'timestamp': '2025-09-30 22:24:49.596332', 'step': 8421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:49.653983', 'step': 8421, 'epoch': 2} {'type': 'loss', 'content': 0.12131886184215546, 'timestamp': '2025-09-30 22:24:49.658874', 'step': 8422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.716620', 'step': 8422, 'epoch': 2} {'type': 'loss', 'content': 0.1412777602672577, 'timestamp': '2025-09-30 22:24:49.729241', 'step': 8423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.788779', 'step': 8423, 'epoch': 2} {'type': 'loss', 'content': 0.12081344425678253, 'timestamp': '2025-09-30 22:24:49.804045', 'step': 8424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:49.869849', 'step': 8424, 'epoch': 2} {'type': 'loss', 'content': 0.14083942770957947, 'timestamp': '2025-09-30 22:24:49.873564', 'step': 8425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:49.949923', 'step': 8425, 'epoch': 2} {'type': 'loss', 'content': 0.10576377809047699, 'timestamp': '2025-09-30 22:24:49.953099', 'step': 8426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:50.012138', 'step': 8426, 'epoch': 2} {'type': 'loss', 'content': 0.11746906489133835, 'timestamp': '2025-09-30 22:24:50.015182', 'step': 8427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:50.074471', 'step': 8427, 'epoch': 2} {'type': 'loss', 'content': 0.1451278030872345, 'timestamp': '2025-09-30 22:24:50.081115', 'step': 8428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:50.139214', 'step': 8428, 'epoch': 2} {'type': 'loss', 'content': 0.09735946357250214, 'timestamp': '2025-09-30 22:24:50.143308', 'step': 8429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:50.210340', 'step': 8429, 'epoch': 2} {'type': 'loss', 'content': 0.1588011234998703, 'timestamp': '2025-09-30 22:24:50.213878', 'step': 8430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:50.273376', 'step': 8430, 'epoch': 2} {'type': 'loss', 'content': 0.11816547811031342, 'timestamp': '2025-09-30 22:24:50.289503', 'step': 8431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:50.348578', 'step': 8431, 'epoch': 2} {'type': 'loss', 'content': 0.2058621048927307, 'timestamp': '2025-09-30 22:24:50.356550', 'step': 8432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:50.427644', 'step': 8432, 'epoch': 2} {'type': 'loss', 'content': 0.13280263543128967, 'timestamp': '2025-09-30 22:24:50.430539', 'step': 8433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:50.495271', 'step': 8433, 'epoch': 2} {'type': 'loss', 'content': 0.0877663865685463, 'timestamp': '2025-09-30 22:24:50.500193', 'step': 8434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:50.566341', 'step': 8434, 'epoch': 2} {'type': 'loss', 'content': 0.17371320724487305, 'timestamp': '2025-09-30 22:24:50.575669', 'step': 8435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:50.637274', 'step': 8435, 'epoch': 2} {'type': 'loss', 'content': 0.1890551596879959, 'timestamp': '2025-09-30 22:24:50.644649', 'step': 8436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:50.718574', 'step': 8436, 'epoch': 2} {'type': 'loss', 'content': 0.13530488312244415, 'timestamp': '2025-09-30 22:24:50.735036', 'step': 8437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:50.797485', 'step': 8437, 'epoch': 2} {'type': 'loss', 'content': 0.16390477120876312, 'timestamp': '2025-09-30 22:24:50.814621', 'step': 8438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:50.884387', 'step': 8438, 'epoch': 2} {'type': 'loss', 'content': 0.17890892922878265, 'timestamp': '2025-09-30 22:24:50.889088', 'step': 8439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:50.948986', 'step': 8439, 'epoch': 2} {'type': 'loss', 'content': 0.14107643067836761, 'timestamp': '2025-09-30 22:24:50.968609', 'step': 8440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:51.042659', 'step': 8440, 'epoch': 2} {'type': 'loss', 'content': 0.11115555465221405, 'timestamp': '2025-09-30 22:24:51.047086', 'step': 8441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:51.118137', 'step': 8441, 'epoch': 2} {'type': 'loss', 'content': 0.12024171650409698, 'timestamp': '2025-09-30 22:24:51.122019', 'step': 8442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:51.183909', 'step': 8442, 'epoch': 2} {'type': 'loss', 'content': 0.07862070947885513, 'timestamp': '2025-09-30 22:24:51.188429', 'step': 8443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:51.249832', 'step': 8443, 'epoch': 2} {'type': 'loss', 'content': 0.19202198088169098, 'timestamp': '2025-09-30 22:24:51.270711', 'step': 8444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:51.349116', 'step': 8444, 'epoch': 2} {'type': 'loss', 'content': 0.12997590005397797, 'timestamp': '2025-09-30 22:24:51.353358', 'step': 8445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:51.420844', 'step': 8445, 'epoch': 2} {'type': 'loss', 'content': 0.06695600599050522, 'timestamp': '2025-09-30 22:24:51.425442', 'step': 8446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:51.496326', 'step': 8446, 'epoch': 2} {'type': 'loss', 'content': 0.10606877505779266, 'timestamp': '2025-09-30 22:24:51.499781', 'step': 8447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:51.559294', 'step': 8447, 'epoch': 2} {'type': 'loss', 'content': 0.16769950091838837, 'timestamp': '2025-09-30 22:24:51.579258', 'step': 8448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:51.646739', 'step': 8448, 'epoch': 2} {'type': 'loss', 'content': 0.21835386753082275, 'timestamp': '2025-09-30 22:24:51.650868', 'step': 8449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:51.710816', 'step': 8449, 'epoch': 2} {'type': 'loss', 'content': 0.12278527766466141, 'timestamp': '2025-09-30 22:24:51.715909', 'step': 8450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:51.773840', 'step': 8450, 'epoch': 2} {'type': 'loss', 'content': 0.07509679347276688, 'timestamp': '2025-09-30 22:24:51.777170', 'step': 8451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:51.837508', 'step': 8451, 'epoch': 2} {'type': 'loss', 'content': 0.17926627397537231, 'timestamp': '2025-09-30 22:24:51.845979', 'step': 8452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:51.907898', 'step': 8452, 'epoch': 2} {'type': 'loss', 'content': 0.11917317658662796, 'timestamp': '2025-09-30 22:24:51.912792', 'step': 8453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:51.972071', 'step': 8453, 'epoch': 2} {'type': 'loss', 'content': 0.18496744334697723, 'timestamp': '2025-09-30 22:24:51.977164', 'step': 8454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:52.034693', 'step': 8454, 'epoch': 2} {'type': 'loss', 'content': 0.13804759085178375, 'timestamp': '2025-09-30 22:24:52.038220', 'step': 8455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:52.107331', 'step': 8455, 'epoch': 2} {'type': 'loss', 'content': 0.12913358211517334, 'timestamp': '2025-09-30 22:24:52.113826', 'step': 8456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:52.175448', 'step': 8456, 'epoch': 2} {'type': 'loss', 'content': 0.1332341730594635, 'timestamp': '2025-09-30 22:24:52.178466', 'step': 8457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:52.241805', 'step': 8457, 'epoch': 2} {'type': 'loss', 'content': 0.15834379196166992, 'timestamp': '2025-09-30 22:24:52.244821', 'step': 8458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:52.304615', 'step': 8458, 'epoch': 2} {'type': 'loss', 'content': 0.14148913323879242, 'timestamp': '2025-09-30 22:24:52.307342', 'step': 8459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:52.376796', 'step': 8459, 'epoch': 2} {'type': 'loss', 'content': 0.12204801291227341, 'timestamp': '2025-09-30 22:24:52.383743', 'step': 8460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:52.449435', 'step': 8460, 'epoch': 2} {'type': 'loss', 'content': 0.1374662220478058, 'timestamp': '2025-09-30 22:24:52.452332', 'step': 8461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:52.520413', 'step': 8461, 'epoch': 2} {'type': 'loss', 'content': 0.1531326174736023, 'timestamp': '2025-09-30 22:24:52.523444', 'step': 8462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:52.586596', 'step': 8462, 'epoch': 2} {'type': 'loss', 'content': 0.2289462387561798, 'timestamp': '2025-09-30 22:24:52.589883', 'step': 8463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:52.659348', 'step': 8463, 'epoch': 2} {'type': 'loss', 'content': 0.0934511125087738, 'timestamp': '2025-09-30 22:24:52.666456', 'step': 8464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:52.724337', 'step': 8464, 'epoch': 2} {'type': 'loss', 'content': 0.11931740492582321, 'timestamp': '2025-09-30 22:24:52.738944', 'step': 8465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:52.820677', 'step': 8465, 'epoch': 2} {'type': 'loss', 'content': 0.13498292863368988, 'timestamp': '2025-09-30 22:24:52.829016', 'step': 8466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:52.889372', 'step': 8466, 'epoch': 2} {'type': 'loss', 'content': 0.17022037506103516, 'timestamp': '2025-09-30 22:24:52.898524', 'step': 8467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:52.965356', 'step': 8467, 'epoch': 2} {'type': 'loss', 'content': 0.08905556797981262, 'timestamp': '2025-09-30 22:24:52.972274', 'step': 8468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:53.030987', 'step': 8468, 'epoch': 2} {'type': 'loss', 'content': 0.1556512415409088, 'timestamp': '2025-09-30 22:24:53.038559', 'step': 8469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:53.098277', 'step': 8469, 'epoch': 2} {'type': 'loss', 'content': 0.08080466091632843, 'timestamp': '2025-09-30 22:24:53.105606', 'step': 8470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:53.174278', 'step': 8470, 'epoch': 2} {'type': 'loss', 'content': 0.21069495379924774, 'timestamp': '2025-09-30 22:24:53.177597', 'step': 8471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:53.235210', 'step': 8471, 'epoch': 2} {'type': 'loss', 'content': 0.15775080025196075, 'timestamp': '2025-09-30 22:24:53.242593', 'step': 8472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:53.306267', 'step': 8472, 'epoch': 2} {'type': 'loss', 'content': 0.19368545711040497, 'timestamp': '2025-09-30 22:24:53.313892', 'step': 8473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:53.372475', 'step': 8473, 'epoch': 2} {'type': 'loss', 'content': 0.10841534286737442, 'timestamp': '2025-09-30 22:24:53.375082', 'step': 8474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:53.433915', 'step': 8474, 'epoch': 2} {'type': 'loss', 'content': 0.06724997609853745, 'timestamp': '2025-09-30 22:24:53.436446', 'step': 8475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:53.500698', 'step': 8475, 'epoch': 2} {'type': 'loss', 'content': 0.213456928730011, 'timestamp': '2025-09-30 22:24:53.507075', 'step': 8476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:53.579117', 'step': 8476, 'epoch': 2} {'type': 'loss', 'content': 0.09329860657453537, 'timestamp': '2025-09-30 22:24:53.588025', 'step': 8477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:53.647345', 'step': 8477, 'epoch': 2} {'type': 'loss', 'content': 0.10106628388166428, 'timestamp': '2025-09-30 22:24:53.650373', 'step': 8478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:53.718088', 'step': 8478, 'epoch': 2} {'type': 'loss', 'content': 0.09074454009532928, 'timestamp': '2025-09-30 22:24:53.728351', 'step': 8479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:53.787845', 'step': 8479, 'epoch': 2} {'type': 'loss', 'content': 0.15327250957489014, 'timestamp': '2025-09-30 22:24:53.795567', 'step': 8480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:53.855728', 'step': 8480, 'epoch': 2} {'type': 'loss', 'content': 0.1139947697520256, 'timestamp': '2025-09-30 22:24:53.859731', 'step': 8481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:53.919592', 'step': 8481, 'epoch': 2} {'type': 'loss', 'content': 0.24758483469486237, 'timestamp': '2025-09-30 22:24:53.924019', 'step': 8482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:54.010679', 'step': 8482, 'epoch': 2} {'type': 'loss', 'content': 0.14950819313526154, 'timestamp': '2025-09-30 22:24:54.013877', 'step': 8483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:54.072962', 'step': 8483, 'epoch': 2} {'type': 'loss', 'content': 0.12405131012201309, 'timestamp': '2025-09-30 22:24:54.079482', 'step': 8484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:54.138325', 'step': 8484, 'epoch': 2} {'type': 'loss', 'content': 0.10597171634435654, 'timestamp': '2025-09-30 22:24:54.141115', 'step': 8485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:54.204642', 'step': 8485, 'epoch': 2} {'type': 'loss', 'content': 0.16178080439567566, 'timestamp': '2025-09-30 22:24:54.212781', 'step': 8486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:54.278086', 'step': 8486, 'epoch': 2} {'type': 'loss', 'content': 0.10198037326335907, 'timestamp': '2025-09-30 22:24:54.281141', 'step': 8487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:54.340854', 'step': 8487, 'epoch': 2} {'type': 'loss', 'content': 0.22573712468147278, 'timestamp': '2025-09-30 22:24:54.347440', 'step': 8488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:54.407307', 'step': 8488, 'epoch': 2} {'type': 'loss', 'content': 0.17752014100551605, 'timestamp': '2025-09-30 22:24:54.410119', 'step': 8489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:54.478215', 'step': 8489, 'epoch': 2} {'type': 'loss', 'content': 0.12038438022136688, 'timestamp': '2025-09-30 22:24:54.481479', 'step': 8490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:54.542902', 'step': 8490, 'epoch': 2} {'type': 'loss', 'content': 0.25988155603408813, 'timestamp': '2025-09-30 22:24:54.545847', 'step': 8491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:54.614791', 'step': 8491, 'epoch': 2} {'type': 'loss', 'content': 0.13134263455867767, 'timestamp': '2025-09-30 22:24:54.622103', 'step': 8492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:54.685440', 'step': 8492, 'epoch': 2} {'type': 'loss', 'content': 0.23978835344314575, 'timestamp': '2025-09-30 22:24:54.688050', 'step': 8493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:54.755230', 'step': 8493, 'epoch': 2} {'type': 'loss', 'content': 0.13223890960216522, 'timestamp': '2025-09-30 22:24:54.758685', 'step': 8494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:54.818217', 'step': 8494, 'epoch': 2} {'type': 'loss', 'content': 0.2274978756904602, 'timestamp': '2025-09-30 22:24:54.821153', 'step': 8495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:54.883894', 'step': 8495, 'epoch': 2} {'type': 'loss', 'content': 0.1793425977230072, 'timestamp': '2025-09-30 22:24:54.890111', 'step': 8496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:54.948199', 'step': 8496, 'epoch': 2} {'type': 'loss', 'content': 0.15316569805145264, 'timestamp': '2025-09-30 22:24:54.951252', 'step': 8497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:55.008634', 'step': 8497, 'epoch': 2} {'type': 'loss', 'content': 0.09355657547712326, 'timestamp': '2025-09-30 22:24:55.011835', 'step': 8498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:55.075807', 'step': 8498, 'epoch': 2} {'type': 'loss', 'content': 0.11367645114660263, 'timestamp': '2025-09-30 22:24:55.078443', 'step': 8499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:24:55.137112', 'step': 8499, 'epoch': 2} {'type': 'loss', 'content': 0.151663139462471, 'timestamp': '2025-09-30 22:24:55.146863', 'step': 8500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8500', 'timestamp': '2025-09-30 22:24:55.583569', 'step': 8500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:55.647288', 'step': 8500, 'epoch': 2} {'type': 'loss', 'content': 0.22309178113937378, 'timestamp': '2025-09-30 22:24:55.654496', 'step': 8501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:55.733979', 'step': 8501, 'epoch': 2} {'type': 'loss', 'content': 0.2192525714635849, 'timestamp': '2025-09-30 22:24:55.737096', 'step': 8502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:55.794144', 'step': 8502, 'epoch': 2} {'type': 'loss', 'content': 0.11870454996824265, 'timestamp': '2025-09-30 22:24:55.797329', 'step': 8503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:55.866187', 'step': 8503, 'epoch': 2} {'type': 'loss', 'content': 0.18931356072425842, 'timestamp': '2025-09-30 22:24:55.872869', 'step': 8504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:55.930128', 'step': 8504, 'epoch': 2} {'type': 'loss', 'content': 0.15667247772216797, 'timestamp': '2025-09-30 22:24:55.932582', 'step': 8505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:55.997185', 'step': 8505, 'epoch': 2} {'type': 'loss', 'content': 0.07471952587366104, 'timestamp': '2025-09-30 22:24:56.000772', 'step': 8506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:56.071116', 'step': 8506, 'epoch': 2} {'type': 'loss', 'content': 0.17387127876281738, 'timestamp': '2025-09-30 22:24:56.073911', 'step': 8507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:56.133981', 'step': 8507, 'epoch': 2} {'type': 'loss', 'content': 0.13000331819057465, 'timestamp': '2025-09-30 22:24:56.140258', 'step': 8508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:56.207070', 'step': 8508, 'epoch': 2} {'type': 'loss', 'content': 0.14134280383586884, 'timestamp': '2025-09-30 22:24:56.210499', 'step': 8509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:56.282607', 'step': 8509, 'epoch': 2} {'type': 'loss', 'content': 0.09208163619041443, 'timestamp': '2025-09-30 22:24:56.285756', 'step': 8510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:56.344758', 'step': 8510, 'epoch': 2} {'type': 'loss', 'content': 0.10038125514984131, 'timestamp': '2025-09-30 22:24:56.347353', 'step': 8511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:56.406206', 'step': 8511, 'epoch': 2} {'type': 'loss', 'content': 0.0969763994216919, 'timestamp': '2025-09-30 22:24:56.412926', 'step': 8512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:56.476649', 'step': 8512, 'epoch': 2} {'type': 'loss', 'content': 0.12167609483003616, 'timestamp': '2025-09-30 22:24:56.479133', 'step': 8513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:56.541496', 'step': 8513, 'epoch': 2} {'type': 'loss', 'content': 0.11184925585985184, 'timestamp': '2025-09-30 22:24:56.544870', 'step': 8514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:56.602992', 'step': 8514, 'epoch': 2} {'type': 'loss', 'content': 0.0946270301938057, 'timestamp': '2025-09-30 22:24:56.607890', 'step': 8515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:24:56.666077', 'step': 8515, 'epoch': 2} {'type': 'loss', 'content': 0.20152243971824646, 'timestamp': '2025-09-30 22:24:56.672048', 'step': 8516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:56.733726', 'step': 8516, 'epoch': 2} {'type': 'loss', 'content': 0.23644021153450012, 'timestamp': '2025-09-30 22:24:56.746092', 'step': 8517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:56.805318', 'step': 8517, 'epoch': 2} {'type': 'loss', 'content': 0.09165297448635101, 'timestamp': '2025-09-30 22:24:56.808283', 'step': 8518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:56.866478', 'step': 8518, 'epoch': 2} {'type': 'loss', 'content': 0.1100388765335083, 'timestamp': '2025-09-30 22:24:56.869627', 'step': 8519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:56.926532', 'step': 8519, 'epoch': 2} {'type': 'loss', 'content': 0.20057803392410278, 'timestamp': '2025-09-30 22:24:56.934285', 'step': 8520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:57.015337', 'step': 8520, 'epoch': 2} {'type': 'loss', 'content': 0.28113025426864624, 'timestamp': '2025-09-30 22:24:57.017767', 'step': 8521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:57.076721', 'step': 8521, 'epoch': 2} {'type': 'loss', 'content': 0.09903792291879654, 'timestamp': '2025-09-30 22:24:57.086912', 'step': 8522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:57.145898', 'step': 8522, 'epoch': 2} {'type': 'loss', 'content': 0.1486896127462387, 'timestamp': '2025-09-30 22:24:57.150022', 'step': 8523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:57.211204', 'step': 8523, 'epoch': 2} {'type': 'loss', 'content': 0.16110990941524506, 'timestamp': '2025-09-30 22:24:57.218053', 'step': 8524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:57.276623', 'step': 8524, 'epoch': 2} {'type': 'loss', 'content': 0.15382370352745056, 'timestamp': '2025-09-30 22:24:57.291983', 'step': 8525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:57.365230', 'step': 8525, 'epoch': 2} {'type': 'loss', 'content': 0.157961905002594, 'timestamp': '2025-09-30 22:24:57.368134', 'step': 8526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:57.432808', 'step': 8526, 'epoch': 2} {'type': 'loss', 'content': 0.12644436955451965, 'timestamp': '2025-09-30 22:24:57.436522', 'step': 8527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:57.495359', 'step': 8527, 'epoch': 2} {'type': 'loss', 'content': 0.1609191596508026, 'timestamp': '2025-09-30 22:24:57.504371', 'step': 8528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:57.606106', 'step': 8528, 'epoch': 2} {'type': 'loss', 'content': 0.13136331737041473, 'timestamp': '2025-09-30 22:24:57.610473', 'step': 8529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:57.669537', 'step': 8529, 'epoch': 2} {'type': 'loss', 'content': 0.14638356864452362, 'timestamp': '2025-09-30 22:24:57.673234', 'step': 8530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:57.731750', 'step': 8530, 'epoch': 2} {'type': 'loss', 'content': 0.14942193031311035, 'timestamp': '2025-09-30 22:24:57.735666', 'step': 8531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:57.794532', 'step': 8531, 'epoch': 2} {'type': 'loss', 'content': 0.14944791793823242, 'timestamp': '2025-09-30 22:24:57.800867', 'step': 8532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:57.866836', 'step': 8532, 'epoch': 2} {'type': 'loss', 'content': 0.1498165875673294, 'timestamp': '2025-09-30 22:24:57.869931', 'step': 8533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:57.926537', 'step': 8533, 'epoch': 2} {'type': 'loss', 'content': 0.16839128732681274, 'timestamp': '2025-09-30 22:24:57.928990', 'step': 8534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:57.997145', 'step': 8534, 'epoch': 2} {'type': 'loss', 'content': 0.1366988718509674, 'timestamp': '2025-09-30 22:24:58.000304', 'step': 8535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:58.060750', 'step': 8535, 'epoch': 2} {'type': 'loss', 'content': 0.09803096950054169, 'timestamp': '2025-09-30 22:24:58.066984', 'step': 8536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:58.124572', 'step': 8536, 'epoch': 2} {'type': 'loss', 'content': 0.18053700029850006, 'timestamp': '2025-09-30 22:24:58.127184', 'step': 8537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:58.184194', 'step': 8537, 'epoch': 2} {'type': 'loss', 'content': 0.12772202491760254, 'timestamp': '2025-09-30 22:24:58.187251', 'step': 8538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:58.245397', 'step': 8538, 'epoch': 2} {'type': 'loss', 'content': 0.06747733056545258, 'timestamp': '2025-09-30 22:24:58.248305', 'step': 8539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:58.307126', 'step': 8539, 'epoch': 2} {'type': 'loss', 'content': 0.24092520773410797, 'timestamp': '2025-09-30 22:24:58.313532', 'step': 8540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:58.375903', 'step': 8540, 'epoch': 2} {'type': 'loss', 'content': 0.08853590488433838, 'timestamp': '2025-09-30 22:24:58.378438', 'step': 8541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:58.436562', 'step': 8541, 'epoch': 2} {'type': 'loss', 'content': 0.05676327645778656, 'timestamp': '2025-09-30 22:24:58.438968', 'step': 8542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:58.496946', 'step': 8542, 'epoch': 2} {'type': 'loss', 'content': 0.2508656680583954, 'timestamp': '2025-09-30 22:24:58.499840', 'step': 8543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:58.572430', 'step': 8543, 'epoch': 2} {'type': 'loss', 'content': 0.06460213661193848, 'timestamp': '2025-09-30 22:24:58.577959', 'step': 8544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:58.637196', 'step': 8544, 'epoch': 2} {'type': 'loss', 'content': 0.15343819558620453, 'timestamp': '2025-09-30 22:24:58.639965', 'step': 8545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:58.697959', 'step': 8545, 'epoch': 2} {'type': 'loss', 'content': 0.11046912521123886, 'timestamp': '2025-09-30 22:24:58.701162', 'step': 8546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:58.758500', 'step': 8546, 'epoch': 2} {'type': 'loss', 'content': 0.14598457515239716, 'timestamp': '2025-09-30 22:24:58.761020', 'step': 8547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:58.818511', 'step': 8547, 'epoch': 2} {'type': 'loss', 'content': 0.13037656247615814, 'timestamp': '2025-09-30 22:24:58.826105', 'step': 8548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:58.882278', 'step': 8548, 'epoch': 2} {'type': 'loss', 'content': 0.07104227691888809, 'timestamp': '2025-09-30 22:24:58.884498', 'step': 8549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:58.953451', 'step': 8549, 'epoch': 2} {'type': 'loss', 'content': 0.11976984888315201, 'timestamp': '2025-09-30 22:24:58.956044', 'step': 8550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:59.015613', 'step': 8550, 'epoch': 2} {'type': 'loss', 'content': 0.10747913271188736, 'timestamp': '2025-09-30 22:24:59.018253', 'step': 8551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:24:59.080741', 'step': 8551, 'epoch': 2} {'type': 'loss', 'content': 0.11223708838224411, 'timestamp': '2025-09-30 22:24:59.087253', 'step': 8552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:59.147779', 'step': 8552, 'epoch': 2} {'type': 'loss', 'content': 0.15671682357788086, 'timestamp': '2025-09-30 22:24:59.151267', 'step': 8553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:59.210033', 'step': 8553, 'epoch': 2} {'type': 'loss', 'content': 0.10535770654678345, 'timestamp': '2025-09-30 22:24:59.212969', 'step': 8554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:59.271894', 'step': 8554, 'epoch': 2} {'type': 'loss', 'content': 0.16638869047164917, 'timestamp': '2025-09-30 22:24:59.274810', 'step': 8555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:59.333271', 'step': 8555, 'epoch': 2} {'type': 'loss', 'content': 0.1275770366191864, 'timestamp': '2025-09-30 22:24:59.341884', 'step': 8556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:59.402790', 'step': 8556, 'epoch': 2} {'type': 'loss', 'content': 0.18568632006645203, 'timestamp': '2025-09-30 22:24:59.405106', 'step': 8557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:59.462408', 'step': 8557, 'epoch': 2} {'type': 'loss', 'content': 0.18385235965251923, 'timestamp': '2025-09-30 22:24:59.465934', 'step': 8558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:59.523994', 'step': 8558, 'epoch': 2} {'type': 'loss', 'content': 0.14298021793365479, 'timestamp': '2025-09-30 22:24:59.527473', 'step': 8559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:24:59.585160', 'step': 8559, 'epoch': 2} {'type': 'loss', 'content': 0.09353948384523392, 'timestamp': '2025-09-30 22:24:59.591604', 'step': 8560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:24:59.653053', 'step': 8560, 'epoch': 2} {'type': 'loss', 'content': 0.09721693396568298, 'timestamp': '2025-09-30 22:24:59.659618', 'step': 8561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:59.721861', 'step': 8561, 'epoch': 2} {'type': 'loss', 'content': 0.08697828650474548, 'timestamp': '2025-09-30 22:24:59.725302', 'step': 8562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:24:59.782447', 'step': 8562, 'epoch': 2} {'type': 'loss', 'content': 0.188999742269516, 'timestamp': '2025-09-30 22:24:59.785097', 'step': 8563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:24:59.847462', 'step': 8563, 'epoch': 2} {'type': 'loss', 'content': 0.10347342491149902, 'timestamp': '2025-09-30 22:24:59.853960', 'step': 8564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:59.918861', 'step': 8564, 'epoch': 2} {'type': 'loss', 'content': 0.15517771244049072, 'timestamp': '2025-09-30 22:24:59.921832', 'step': 8565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:24:59.984582', 'step': 8565, 'epoch': 2} {'type': 'loss', 'content': 0.12807489931583405, 'timestamp': '2025-09-30 22:24:59.988431', 'step': 8566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:00.063834', 'step': 8566, 'epoch': 2} {'type': 'loss', 'content': 0.18149365484714508, 'timestamp': '2025-09-30 22:25:00.067497', 'step': 8567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:00.125729', 'step': 8567, 'epoch': 2} {'type': 'loss', 'content': 0.18210428953170776, 'timestamp': '2025-09-30 22:25:00.132817', 'step': 8568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:00.193778', 'step': 8568, 'epoch': 2} {'type': 'loss', 'content': 0.1098230704665184, 'timestamp': '2025-09-30 22:25:00.196981', 'step': 8569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:00.255204', 'step': 8569, 'epoch': 2} {'type': 'loss', 'content': 0.14383703470230103, 'timestamp': '2025-09-30 22:25:00.258001', 'step': 8570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:00.315807', 'step': 8570, 'epoch': 2} {'type': 'loss', 'content': 0.13625064492225647, 'timestamp': '2025-09-30 22:25:00.318410', 'step': 8571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:00.376541', 'step': 8571, 'epoch': 2} {'type': 'loss', 'content': 0.14027035236358643, 'timestamp': '2025-09-30 22:25:00.384868', 'step': 8572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:00.448705', 'step': 8572, 'epoch': 2} {'type': 'loss', 'content': 0.17262931168079376, 'timestamp': '2025-09-30 22:25:00.451469', 'step': 8573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:00.509545', 'step': 8573, 'epoch': 2} {'type': 'loss', 'content': 0.15001672506332397, 'timestamp': '2025-09-30 22:25:00.512179', 'step': 8574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:00.569679', 'step': 8574, 'epoch': 2} {'type': 'loss', 'content': 0.09053747355937958, 'timestamp': '2025-09-30 22:25:00.572766', 'step': 8575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:00.630272', 'step': 8575, 'epoch': 2} {'type': 'loss', 'content': 0.10597409307956696, 'timestamp': '2025-09-30 22:25:00.636819', 'step': 8576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:00.712053', 'step': 8576, 'epoch': 2} {'type': 'loss', 'content': 0.11017251759767532, 'timestamp': '2025-09-30 22:25:00.718521', 'step': 8577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:00.775923', 'step': 8577, 'epoch': 2} {'type': 'loss', 'content': 0.20380422472953796, 'timestamp': '2025-09-30 22:25:00.778733', 'step': 8578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:00.836589', 'step': 8578, 'epoch': 2} {'type': 'loss', 'content': 0.1750304400920868, 'timestamp': '2025-09-30 22:25:00.842354', 'step': 8579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:00.903372', 'step': 8579, 'epoch': 2} {'type': 'loss', 'content': 0.09351895749568939, 'timestamp': '2025-09-30 22:25:00.909366', 'step': 8580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:00.966594', 'step': 8580, 'epoch': 2} {'type': 'loss', 'content': 0.19923274219036102, 'timestamp': '2025-09-30 22:25:00.974908', 'step': 8581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:01.036694', 'step': 8581, 'epoch': 2} {'type': 'loss', 'content': 0.15122631192207336, 'timestamp': '2025-09-30 22:25:01.040214', 'step': 8582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:01.099232', 'step': 8582, 'epoch': 2} {'type': 'loss', 'content': 0.13422589004039764, 'timestamp': '2025-09-30 22:25:01.101892', 'step': 8583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:01.169931', 'step': 8583, 'epoch': 2} {'type': 'loss', 'content': 0.19399221241474152, 'timestamp': '2025-09-30 22:25:01.176323', 'step': 8584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:01.232692', 'step': 8584, 'epoch': 2} {'type': 'loss', 'content': 0.11654641479253769, 'timestamp': '2025-09-30 22:25:01.235359', 'step': 8585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:01.297920', 'step': 8585, 'epoch': 2} {'type': 'loss', 'content': 0.1306169480085373, 'timestamp': '2025-09-30 22:25:01.305641', 'step': 8586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:01.369835', 'step': 8586, 'epoch': 2} {'type': 'loss', 'content': 0.1432645171880722, 'timestamp': '2025-09-30 22:25:01.377769', 'step': 8587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:01.447288', 'step': 8587, 'epoch': 2} {'type': 'loss', 'content': 0.152825728058815, 'timestamp': '2025-09-30 22:25:01.457200', 'step': 8588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:01.526146', 'step': 8588, 'epoch': 2} {'type': 'loss', 'content': 0.1112510934472084, 'timestamp': '2025-09-30 22:25:01.529034', 'step': 8589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:01.590322', 'step': 8589, 'epoch': 2} {'type': 'loss', 'content': 0.14791721105575562, 'timestamp': '2025-09-30 22:25:01.592840', 'step': 8590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:01.656836', 'step': 8590, 'epoch': 2} {'type': 'loss', 'content': 0.18870560824871063, 'timestamp': '2025-09-30 22:25:01.659217', 'step': 8591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:01.716250', 'step': 8591, 'epoch': 2} {'type': 'loss', 'content': 0.14166992902755737, 'timestamp': '2025-09-30 22:25:01.722246', 'step': 8592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:01.779161', 'step': 8592, 'epoch': 2} {'type': 'loss', 'content': 0.21475234627723694, 'timestamp': '2025-09-30 22:25:01.790178', 'step': 8593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:01.849364', 'step': 8593, 'epoch': 2} {'type': 'loss', 'content': 0.10213916748762131, 'timestamp': '2025-09-30 22:25:01.855292', 'step': 8594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:01.917550', 'step': 8594, 'epoch': 2} {'type': 'loss', 'content': 0.17702840268611908, 'timestamp': '2025-09-30 22:25:01.922906', 'step': 8595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:01.996309', 'step': 8595, 'epoch': 2} {'type': 'loss', 'content': 0.24930864572525024, 'timestamp': '2025-09-30 22:25:02.003258', 'step': 8596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:02.060935', 'step': 8596, 'epoch': 2} {'type': 'loss', 'content': 0.095322385430336, 'timestamp': '2025-09-30 22:25:02.066916', 'step': 8597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:02.143800', 'step': 8597, 'epoch': 2} {'type': 'loss', 'content': 0.1301865130662918, 'timestamp': '2025-09-30 22:25:02.147382', 'step': 8598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:02.204527', 'step': 8598, 'epoch': 2} {'type': 'loss', 'content': 0.14995397627353668, 'timestamp': '2025-09-30 22:25:02.211308', 'step': 8599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:02.275991', 'step': 8599, 'epoch': 2} {'type': 'loss', 'content': 0.04796627536416054, 'timestamp': '2025-09-30 22:25:02.288277', 'step': 8600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:02.347570', 'step': 8600, 'epoch': 2} {'type': 'loss', 'content': 0.10352849960327148, 'timestamp': '2025-09-30 22:25:02.358674', 'step': 8601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:02.417220', 'step': 8601, 'epoch': 2} {'type': 'loss', 'content': 0.16751185059547424, 'timestamp': '2025-09-30 22:25:02.425812', 'step': 8602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:02.486597', 'step': 8602, 'epoch': 2} {'type': 'loss', 'content': 0.10993282496929169, 'timestamp': '2025-09-30 22:25:02.490196', 'step': 8603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:02.569968', 'step': 8603, 'epoch': 2} {'type': 'loss', 'content': 0.16688551008701324, 'timestamp': '2025-09-30 22:25:02.580459', 'step': 8604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:25:02.647939', 'step': 8604, 'epoch': 2} {'type': 'loss', 'content': 0.1462969183921814, 'timestamp': '2025-09-30 22:25:02.657411', 'step': 8605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:02.720140', 'step': 8605, 'epoch': 2} {'type': 'loss', 'content': 0.08867296576499939, 'timestamp': '2025-09-30 22:25:02.724939', 'step': 8606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:02.794512', 'step': 8606, 'epoch': 2} {'type': 'loss', 'content': 0.13164788484573364, 'timestamp': '2025-09-30 22:25:02.797036', 'step': 8607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:02.856979', 'step': 8607, 'epoch': 2} {'type': 'loss', 'content': 0.1706092357635498, 'timestamp': '2025-09-30 22:25:02.867103', 'step': 8608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:02.928120', 'step': 8608, 'epoch': 2} {'type': 'loss', 'content': 0.1963576376438141, 'timestamp': '2025-09-30 22:25:02.931178', 'step': 8609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:02.995625', 'step': 8609, 'epoch': 2} {'type': 'loss', 'content': 0.11122075468301773, 'timestamp': '2025-09-30 22:25:03.001753', 'step': 8610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:03.061127', 'step': 8610, 'epoch': 2} {'type': 'loss', 'content': 0.09708577394485474, 'timestamp': '2025-09-30 22:25:03.070642', 'step': 8611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:03.144630', 'step': 8611, 'epoch': 2} {'type': 'loss', 'content': 0.09227538108825684, 'timestamp': '2025-09-30 22:25:03.158125', 'step': 8612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:03.217659', 'step': 8612, 'epoch': 2} {'type': 'loss', 'content': 0.10922746360301971, 'timestamp': '2025-09-30 22:25:03.223745', 'step': 8613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:03.286202', 'step': 8613, 'epoch': 2} {'type': 'loss', 'content': 0.10305289179086685, 'timestamp': '2025-09-30 22:25:03.289262', 'step': 8614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:03.347583', 'step': 8614, 'epoch': 2} {'type': 'loss', 'content': 0.11598358303308487, 'timestamp': '2025-09-30 22:25:03.351021', 'step': 8615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:25:03.416891', 'step': 8615, 'epoch': 2} {'type': 'loss', 'content': 0.17081661522388458, 'timestamp': '2025-09-30 22:25:03.426339', 'step': 8616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:03.486205', 'step': 8616, 'epoch': 2} {'type': 'loss', 'content': 0.1735091358423233, 'timestamp': '2025-09-30 22:25:03.490314', 'step': 8617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:03.550608', 'step': 8617, 'epoch': 2} {'type': 'loss', 'content': 0.17730432748794556, 'timestamp': '2025-09-30 22:25:03.553786', 'step': 8618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:03.613363', 'step': 8618, 'epoch': 2} {'type': 'loss', 'content': 0.08119502663612366, 'timestamp': '2025-09-30 22:25:03.621515', 'step': 8619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:03.688795', 'step': 8619, 'epoch': 2} {'type': 'loss', 'content': 0.13230973482131958, 'timestamp': '2025-09-30 22:25:03.698269', 'step': 8620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:03.754917', 'step': 8620, 'epoch': 2} {'type': 'loss', 'content': 0.14205750823020935, 'timestamp': '2025-09-30 22:25:03.761548', 'step': 8621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:03.831223', 'step': 8621, 'epoch': 2} {'type': 'loss', 'content': 0.18362563848495483, 'timestamp': '2025-09-30 22:25:03.836262', 'step': 8622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:03.917378', 'step': 8622, 'epoch': 2} {'type': 'loss', 'content': 0.12317385524511337, 'timestamp': '2025-09-30 22:25:03.922311', 'step': 8623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:03.984113', 'step': 8623, 'epoch': 2} {'type': 'loss', 'content': 0.06062282621860504, 'timestamp': '2025-09-30 22:25:03.991275', 'step': 8624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:04.049776', 'step': 8624, 'epoch': 2} {'type': 'loss', 'content': 0.10492487251758575, 'timestamp': '2025-09-30 22:25:04.055522', 'step': 8625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:04.120793', 'step': 8625, 'epoch': 2} {'type': 'loss', 'content': 0.12664169073104858, 'timestamp': '2025-09-30 22:25:04.128187', 'step': 8626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:04.187879', 'step': 8626, 'epoch': 2} {'type': 'loss', 'content': 0.16257783770561218, 'timestamp': '2025-09-30 22:25:04.191950', 'step': 8627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:04.258950', 'step': 8627, 'epoch': 2} {'type': 'loss', 'content': 0.11699651181697845, 'timestamp': '2025-09-30 22:25:04.265361', 'step': 8628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:04.328970', 'step': 8628, 'epoch': 2} {'type': 'loss', 'content': 0.08728952705860138, 'timestamp': '2025-09-30 22:25:04.331849', 'step': 8629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:04.390175', 'step': 8629, 'epoch': 2} {'type': 'loss', 'content': 0.1585361361503601, 'timestamp': '2025-09-30 22:25:04.401568', 'step': 8630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:04.476162', 'step': 8630, 'epoch': 2} {'type': 'loss', 'content': 0.12712788581848145, 'timestamp': '2025-09-30 22:25:04.479043', 'step': 8631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:04.553880', 'step': 8631, 'epoch': 2} {'type': 'loss', 'content': 0.16016337275505066, 'timestamp': '2025-09-30 22:25:04.566781', 'step': 8632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:04.629303', 'step': 8632, 'epoch': 2} {'type': 'loss', 'content': 0.11014830321073532, 'timestamp': '2025-09-30 22:25:04.634173', 'step': 8633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:04.695974', 'step': 8633, 'epoch': 2} {'type': 'loss', 'content': 0.15302428603172302, 'timestamp': '2025-09-30 22:25:04.698738', 'step': 8634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:04.755857', 'step': 8634, 'epoch': 2} {'type': 'loss', 'content': 0.12369859963655472, 'timestamp': '2025-09-30 22:25:04.761982', 'step': 8635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:04.819815', 'step': 8635, 'epoch': 2} {'type': 'loss', 'content': 0.18910378217697144, 'timestamp': '2025-09-30 22:25:04.826297', 'step': 8636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:04.888799', 'step': 8636, 'epoch': 2} {'type': 'loss', 'content': 0.1115562692284584, 'timestamp': '2025-09-30 22:25:04.896833', 'step': 8637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:04.957864', 'step': 8637, 'epoch': 2} {'type': 'loss', 'content': 0.1645069122314453, 'timestamp': '2025-09-30 22:25:04.960997', 'step': 8638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:05.025544', 'step': 8638, 'epoch': 2} {'type': 'loss', 'content': 0.11566446721553802, 'timestamp': '2025-09-30 22:25:05.032092', 'step': 8639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:05.095360', 'step': 8639, 'epoch': 2} {'type': 'loss', 'content': 0.18296389281749725, 'timestamp': '2025-09-30 22:25:05.107385', 'step': 8640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:05.164405', 'step': 8640, 'epoch': 2} {'type': 'loss', 'content': 0.1290402114391327, 'timestamp': '2025-09-30 22:25:05.172917', 'step': 8641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:05.265735', 'step': 8641, 'epoch': 2} {'type': 'loss', 'content': 0.13522441685199738, 'timestamp': '2025-09-30 22:25:05.272337', 'step': 8642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:05.351352', 'step': 8642, 'epoch': 2} {'type': 'loss', 'content': 0.16545195877552032, 'timestamp': '2025-09-30 22:25:05.360500', 'step': 8643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:05.419899', 'step': 8643, 'epoch': 2} {'type': 'loss', 'content': 0.08567994832992554, 'timestamp': '2025-09-30 22:25:05.433400', 'step': 8644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:05.492171', 'step': 8644, 'epoch': 2} {'type': 'loss', 'content': 0.06895680725574493, 'timestamp': '2025-09-30 22:25:05.498014', 'step': 8645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:05.562225', 'step': 8645, 'epoch': 2} {'type': 'loss', 'content': 0.11332176625728607, 'timestamp': '2025-09-30 22:25:05.569835', 'step': 8646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:05.637230', 'step': 8646, 'epoch': 2} {'type': 'loss', 'content': 0.1895478516817093, 'timestamp': '2025-09-30 22:25:05.640933', 'step': 8647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:05.700729', 'step': 8647, 'epoch': 2} {'type': 'loss', 'content': 0.16924089193344116, 'timestamp': '2025-09-30 22:25:05.709495', 'step': 8648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:05.776937', 'step': 8648, 'epoch': 2} {'type': 'loss', 'content': 0.08458952605724335, 'timestamp': '2025-09-30 22:25:05.784151', 'step': 8649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:05.842418', 'step': 8649, 'epoch': 2} {'type': 'loss', 'content': 0.1359841674566269, 'timestamp': '2025-09-30 22:25:05.845519', 'step': 8650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:05.914062', 'step': 8650, 'epoch': 2} {'type': 'loss', 'content': 0.17113082110881805, 'timestamp': '2025-09-30 22:25:05.927152', 'step': 8651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:05.994621', 'step': 8651, 'epoch': 2} {'type': 'loss', 'content': 0.06340070068836212, 'timestamp': '2025-09-30 22:25:06.005774', 'step': 8652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.076062', 'step': 8652, 'epoch': 2} {'type': 'loss', 'content': 0.1315697729587555, 'timestamp': '2025-09-30 22:25:06.079530', 'step': 8653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.141203', 'step': 8653, 'epoch': 2} {'type': 'loss', 'content': 0.1579071581363678, 'timestamp': '2025-09-30 22:25:06.145564', 'step': 8654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.205201', 'step': 8654, 'epoch': 2} {'type': 'loss', 'content': 0.14007773995399475, 'timestamp': '2025-09-30 22:25:06.208159', 'step': 8655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.273965', 'step': 8655, 'epoch': 2} {'type': 'loss', 'content': 0.15321841835975647, 'timestamp': '2025-09-30 22:25:06.280009', 'step': 8656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.344232', 'step': 8656, 'epoch': 2} {'type': 'loss', 'content': 0.1366366744041443, 'timestamp': '2025-09-30 22:25:06.353848', 'step': 8657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.411333', 'step': 8657, 'epoch': 2} {'type': 'loss', 'content': 0.1378040611743927, 'timestamp': '2025-09-30 22:25:06.416287', 'step': 8658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.473938', 'step': 8658, 'epoch': 2} {'type': 'loss', 'content': 0.10226781666278839, 'timestamp': '2025-09-30 22:25:06.482999', 'step': 8659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:06.549060', 'step': 8659, 'epoch': 2} {'type': 'loss', 'content': 0.09545569866895676, 'timestamp': '2025-09-30 22:25:06.559177', 'step': 8660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.619841', 'step': 8660, 'epoch': 2} {'type': 'loss', 'content': 0.07606326788663864, 'timestamp': '2025-09-30 22:25:06.630174', 'step': 8661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:06.688136', 'step': 8661, 'epoch': 2} {'type': 'loss', 'content': 0.08457597345113754, 'timestamp': '2025-09-30 22:25:06.695018', 'step': 8662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:06.758687', 'step': 8662, 'epoch': 2} {'type': 'loss', 'content': 0.11661138385534286, 'timestamp': '2025-09-30 22:25:06.766986', 'step': 8663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:06.829830', 'step': 8663, 'epoch': 2} {'type': 'loss', 'content': 0.12062583863735199, 'timestamp': '2025-09-30 22:25:06.836256', 'step': 8664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:06.904263', 'step': 8664, 'epoch': 2} {'type': 'loss', 'content': 0.12280440330505371, 'timestamp': '2025-09-30 22:25:06.907287', 'step': 8665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:06.965502', 'step': 8665, 'epoch': 2} {'type': 'loss', 'content': 0.1943390667438507, 'timestamp': '2025-09-30 22:25:06.969192', 'step': 8666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:07.035702', 'step': 8666, 'epoch': 2} {'type': 'loss', 'content': 0.10737885534763336, 'timestamp': '2025-09-30 22:25:07.039419', 'step': 8667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:07.098112', 'step': 8667, 'epoch': 2} {'type': 'loss', 'content': 0.08759450912475586, 'timestamp': '2025-09-30 22:25:07.105547', 'step': 8668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:07.164090', 'step': 8668, 'epoch': 2} {'type': 'loss', 'content': 0.08319293707609177, 'timestamp': '2025-09-30 22:25:07.174443', 'step': 8669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:07.233346', 'step': 8669, 'epoch': 2} {'type': 'loss', 'content': 0.1970929652452469, 'timestamp': '2025-09-30 22:25:07.236369', 'step': 8670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:07.302709', 'step': 8670, 'epoch': 2} {'type': 'loss', 'content': 0.11413067579269409, 'timestamp': '2025-09-30 22:25:07.307324', 'step': 8671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:25:07.367698', 'step': 8671, 'epoch': 2} {'type': 'loss', 'content': 0.17139610648155212, 'timestamp': '2025-09-30 22:25:07.374727', 'step': 8672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:07.432853', 'step': 8672, 'epoch': 2} {'type': 'loss', 'content': 0.10955806076526642, 'timestamp': '2025-09-30 22:25:07.442793', 'step': 8673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:07.505195', 'step': 8673, 'epoch': 2} {'type': 'loss', 'content': 0.10448566824197769, 'timestamp': '2025-09-30 22:25:07.508830', 'step': 8674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:07.579861', 'step': 8674, 'epoch': 2} {'type': 'loss', 'content': 0.13846158981323242, 'timestamp': '2025-09-30 22:25:07.583616', 'step': 8675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:07.646272', 'step': 8675, 'epoch': 2} {'type': 'loss', 'content': 0.11988504976034164, 'timestamp': '2025-09-30 22:25:07.652506', 'step': 8676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:07.715284', 'step': 8676, 'epoch': 2} {'type': 'loss', 'content': 0.12055838108062744, 'timestamp': '2025-09-30 22:25:07.725278', 'step': 8677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:07.790975', 'step': 8677, 'epoch': 2} {'type': 'loss', 'content': 0.13956616818904877, 'timestamp': '2025-09-30 22:25:07.794793', 'step': 8678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:07.867428', 'step': 8678, 'epoch': 2} {'type': 'loss', 'content': 0.150490865111351, 'timestamp': '2025-09-30 22:25:07.871134', 'step': 8679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:07.928219', 'step': 8679, 'epoch': 2} {'type': 'loss', 'content': 0.114422507584095, 'timestamp': '2025-09-30 22:25:07.941152', 'step': 8680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:07.998877', 'step': 8680, 'epoch': 2} {'type': 'loss', 'content': 0.1635155975818634, 'timestamp': '2025-09-30 22:25:08.008909', 'step': 8681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:08.076048', 'step': 8681, 'epoch': 2} {'type': 'loss', 'content': 0.09665044397115707, 'timestamp': '2025-09-30 22:25:08.081108', 'step': 8682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:08.167043', 'step': 8682, 'epoch': 2} {'type': 'loss', 'content': 0.14100389182567596, 'timestamp': '2025-09-30 22:25:08.170891', 'step': 8683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:08.247814', 'step': 8683, 'epoch': 2} {'type': 'loss', 'content': 0.08734428137540817, 'timestamp': '2025-09-30 22:25:08.254358', 'step': 8684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:08.322466', 'step': 8684, 'epoch': 2} {'type': 'loss', 'content': 0.16523125767707825, 'timestamp': '2025-09-30 22:25:08.325758', 'step': 8685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:08.383532', 'step': 8685, 'epoch': 2} {'type': 'loss', 'content': 0.23153667151927948, 'timestamp': '2025-09-30 22:25:08.386439', 'step': 8686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:08.445870', 'step': 8686, 'epoch': 2} {'type': 'loss', 'content': 0.20647142827510834, 'timestamp': '2025-09-30 22:25:08.448627', 'step': 8687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:08.515824', 'step': 8687, 'epoch': 2} {'type': 'loss', 'content': 0.1449265480041504, 'timestamp': '2025-09-30 22:25:08.522396', 'step': 8688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:08.587894', 'step': 8688, 'epoch': 2} {'type': 'loss', 'content': 0.1726698875427246, 'timestamp': '2025-09-30 22:25:08.591108', 'step': 8689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:08.649414', 'step': 8689, 'epoch': 2} {'type': 'loss', 'content': 0.1749296486377716, 'timestamp': '2025-09-30 22:25:08.662463', 'step': 8690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:08.728771', 'step': 8690, 'epoch': 2} {'type': 'loss', 'content': 0.21798883378505707, 'timestamp': '2025-09-30 22:25:08.732166', 'step': 8691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:08.807420', 'step': 8691, 'epoch': 2} {'type': 'loss', 'content': 0.15269675850868225, 'timestamp': '2025-09-30 22:25:08.813759', 'step': 8692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:08.870792', 'step': 8692, 'epoch': 2} {'type': 'loss', 'content': 0.13402527570724487, 'timestamp': '2025-09-30 22:25:08.874564', 'step': 8693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:08.936154', 'step': 8693, 'epoch': 2} {'type': 'loss', 'content': 0.141170933842659, 'timestamp': '2025-09-30 22:25:08.939369', 'step': 8694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:09.010167', 'step': 8694, 'epoch': 2} {'type': 'loss', 'content': 0.09342106431722641, 'timestamp': '2025-09-30 22:25:09.013200', 'step': 8695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:09.071470', 'step': 8695, 'epoch': 2} {'type': 'loss', 'content': 0.09673458337783813, 'timestamp': '2025-09-30 22:25:09.079459', 'step': 8696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:09.137553', 'step': 8696, 'epoch': 2} {'type': 'loss', 'content': 0.19380046427249908, 'timestamp': '2025-09-30 22:25:09.142017', 'step': 8697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-30 22:25:09.217055', 'step': 8697, 'epoch': 2} {'type': 'loss', 'content': 0.2936561405658722, 'timestamp': '2025-09-30 22:25:09.221460', 'step': 8698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:09.284960', 'step': 8698, 'epoch': 2} {'type': 'loss', 'content': 0.10559366643428802, 'timestamp': '2025-09-30 22:25:09.294499', 'step': 8699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:09.351369', 'step': 8699, 'epoch': 2} {'type': 'loss', 'content': 0.19530761241912842, 'timestamp': '2025-09-30 22:25:09.365103', 'step': 8700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:09.445185', 'step': 8700, 'epoch': 2} {'type': 'loss', 'content': 0.21954886615276337, 'timestamp': '2025-09-30 22:25:09.449556', 'step': 8701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:09.510354', 'step': 8701, 'epoch': 2} {'type': 'loss', 'content': 0.12129905074834824, 'timestamp': '2025-09-30 22:25:09.515320', 'step': 8702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:09.580222', 'step': 8702, 'epoch': 2} {'type': 'loss', 'content': 0.1262504607439041, 'timestamp': '2025-09-30 22:25:09.583978', 'step': 8703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:09.646947', 'step': 8703, 'epoch': 2} {'type': 'loss', 'content': 0.0917869284749031, 'timestamp': '2025-09-30 22:25:09.653486', 'step': 8704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:09.715852', 'step': 8704, 'epoch': 2} {'type': 'loss', 'content': 0.18780697882175446, 'timestamp': '2025-09-30 22:25:09.718910', 'step': 8705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:09.777253', 'step': 8705, 'epoch': 2} {'type': 'loss', 'content': 0.08806099742650986, 'timestamp': '2025-09-30 22:25:09.785524', 'step': 8706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:09.849612', 'step': 8706, 'epoch': 2} {'type': 'loss', 'content': 0.1680828481912613, 'timestamp': '2025-09-30 22:25:09.857056', 'step': 8707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:09.941861', 'step': 8707, 'epoch': 2} {'type': 'loss', 'content': 0.07349655777215958, 'timestamp': '2025-09-30 22:25:09.948188', 'step': 8708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.005462', 'step': 8708, 'epoch': 2} {'type': 'loss', 'content': 0.17743101716041565, 'timestamp': '2025-09-30 22:25:10.008458', 'step': 8709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.065977', 'step': 8709, 'epoch': 2} {'type': 'loss', 'content': 0.11690578609704971, 'timestamp': '2025-09-30 22:25:10.075427', 'step': 8710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:10.142434', 'step': 8710, 'epoch': 2} {'type': 'loss', 'content': 0.12197115272283554, 'timestamp': '2025-09-30 22:25:10.151257', 'step': 8711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:10.217877', 'step': 8711, 'epoch': 2} {'type': 'loss', 'content': 0.10555095970630646, 'timestamp': '2025-09-30 22:25:10.233362', 'step': 8712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.291932', 'step': 8712, 'epoch': 2} {'type': 'loss', 'content': 0.10408101230859756, 'timestamp': '2025-09-30 22:25:10.294802', 'step': 8713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.355100', 'step': 8713, 'epoch': 2} {'type': 'loss', 'content': 0.10099659860134125, 'timestamp': '2025-09-30 22:25:10.363114', 'step': 8714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.423573', 'step': 8714, 'epoch': 2} {'type': 'loss', 'content': 0.1309550255537033, 'timestamp': '2025-09-30 22:25:10.428349', 'step': 8715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:10.487109', 'step': 8715, 'epoch': 2} {'type': 'loss', 'content': 0.1502373218536377, 'timestamp': '2025-09-30 22:25:10.501085', 'step': 8716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.566332', 'step': 8716, 'epoch': 2} {'type': 'loss', 'content': 0.11945680528879166, 'timestamp': '2025-09-30 22:25:10.574102', 'step': 8717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.633571', 'step': 8717, 'epoch': 2} {'type': 'loss', 'content': 0.1830117404460907, 'timestamp': '2025-09-30 22:25:10.643238', 'step': 8718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.716056', 'step': 8718, 'epoch': 2} {'type': 'loss', 'content': 0.1749587059020996, 'timestamp': '2025-09-30 22:25:10.719197', 'step': 8719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.777730', 'step': 8719, 'epoch': 2} {'type': 'loss', 'content': 0.20041726529598236, 'timestamp': '2025-09-30 22:25:10.784074', 'step': 8720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:10.854003', 'step': 8720, 'epoch': 2} {'type': 'loss', 'content': 0.14768248796463013, 'timestamp': '2025-09-30 22:25:10.856995', 'step': 8721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:10.919712', 'step': 8721, 'epoch': 2} {'type': 'loss', 'content': 0.21789878606796265, 'timestamp': '2025-09-30 22:25:10.923520', 'step': 8722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:10.997062', 'step': 8722, 'epoch': 2} {'type': 'loss', 'content': 0.1259617656469345, 'timestamp': '2025-09-30 22:25:10.999930', 'step': 8723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:11.061579', 'step': 8723, 'epoch': 2} {'type': 'loss', 'content': 0.07170792669057846, 'timestamp': '2025-09-30 22:25:11.068986', 'step': 8724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:11.128574', 'step': 8724, 'epoch': 2} {'type': 'loss', 'content': 0.13731752336025238, 'timestamp': '2025-09-30 22:25:11.138034', 'step': 8725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:11.209423', 'step': 8725, 'epoch': 2} {'type': 'loss', 'content': 0.1378301978111267, 'timestamp': '2025-09-30 22:25:11.212290', 'step': 8726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:11.270457', 'step': 8726, 'epoch': 2} {'type': 'loss', 'content': 0.18802520632743835, 'timestamp': '2025-09-30 22:25:11.273427', 'step': 8727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:11.338049', 'step': 8727, 'epoch': 2} {'type': 'loss', 'content': 0.03677602484822273, 'timestamp': '2025-09-30 22:25:11.344882', 'step': 8728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:11.406029', 'step': 8728, 'epoch': 2} {'type': 'loss', 'content': 0.10685774683952332, 'timestamp': '2025-09-30 22:25:11.409947', 'step': 8729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:11.473969', 'step': 8729, 'epoch': 2} {'type': 'loss', 'content': 0.201949805021286, 'timestamp': '2025-09-30 22:25:11.477075', 'step': 8730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:11.537636', 'step': 8730, 'epoch': 2} {'type': 'loss', 'content': 0.16902513802051544, 'timestamp': '2025-09-30 22:25:11.540116', 'step': 8731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:11.611551', 'step': 8731, 'epoch': 2} {'type': 'loss', 'content': 0.09756667912006378, 'timestamp': '2025-09-30 22:25:11.619132', 'step': 8732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:11.676267', 'step': 8732, 'epoch': 2} {'type': 'loss', 'content': 0.1588275134563446, 'timestamp': '2025-09-30 22:25:11.683741', 'step': 8733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:11.747804', 'step': 8733, 'epoch': 2} {'type': 'loss', 'content': 0.11943294107913971, 'timestamp': '2025-09-30 22:25:11.750653', 'step': 8734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:11.808461', 'step': 8734, 'epoch': 2} {'type': 'loss', 'content': 0.0610513761639595, 'timestamp': '2025-09-30 22:25:11.812619', 'step': 8735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:11.876412', 'step': 8735, 'epoch': 2} {'type': 'loss', 'content': 0.12967544794082642, 'timestamp': '2025-09-30 22:25:11.893352', 'step': 8736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:11.954189', 'step': 8736, 'epoch': 2} {'type': 'loss', 'content': 0.17475692927837372, 'timestamp': '2025-09-30 22:25:11.958180', 'step': 8737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:12.017636', 'step': 8737, 'epoch': 2} {'type': 'loss', 'content': 0.14681023359298706, 'timestamp': '2025-09-30 22:25:12.020670', 'step': 8738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:12.086436', 'step': 8738, 'epoch': 2} {'type': 'loss', 'content': 0.1045466810464859, 'timestamp': '2025-09-30 22:25:12.089538', 'step': 8739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:12.148790', 'step': 8739, 'epoch': 2} {'type': 'loss', 'content': 0.16659285128116608, 'timestamp': '2025-09-30 22:25:12.155813', 'step': 8740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:12.220382', 'step': 8740, 'epoch': 2} {'type': 'loss', 'content': 0.12221802026033401, 'timestamp': '2025-09-30 22:25:12.225399', 'step': 8741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.285327', 'step': 8741, 'epoch': 2} {'type': 'loss', 'content': 0.11012990772724152, 'timestamp': '2025-09-30 22:25:12.289079', 'step': 8742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:12.354979', 'step': 8742, 'epoch': 2} {'type': 'loss', 'content': 0.15030381083488464, 'timestamp': '2025-09-30 22:25:12.360740', 'step': 8743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.420773', 'step': 8743, 'epoch': 2} {'type': 'loss', 'content': 0.12447003275156021, 'timestamp': '2025-09-30 22:25:12.429900', 'step': 8744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:12.487148', 'step': 8744, 'epoch': 2} {'type': 'loss', 'content': 0.12832477688789368, 'timestamp': '2025-09-30 22:25:12.491026', 'step': 8745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.550192', 'step': 8745, 'epoch': 2} {'type': 'loss', 'content': 0.17382226884365082, 'timestamp': '2025-09-30 22:25:12.554642', 'step': 8746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.612184', 'step': 8746, 'epoch': 2} {'type': 'loss', 'content': 0.21212035417556763, 'timestamp': '2025-09-30 22:25:12.616926', 'step': 8747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.675114', 'step': 8747, 'epoch': 2} {'type': 'loss', 'content': 0.08683811128139496, 'timestamp': '2025-09-30 22:25:12.689133', 'step': 8748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.751824', 'step': 8748, 'epoch': 2} {'type': 'loss', 'content': 0.15313297510147095, 'timestamp': '2025-09-30 22:25:12.755790', 'step': 8749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:12.814686', 'step': 8749, 'epoch': 2} {'type': 'loss', 'content': 0.15224814414978027, 'timestamp': '2025-09-30 22:25:12.817907', 'step': 8750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.874734', 'step': 8750, 'epoch': 2} {'type': 'loss', 'content': 0.16171139478683472, 'timestamp': '2025-09-30 22:25:12.878298', 'step': 8751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:12.936045', 'step': 8751, 'epoch': 2} {'type': 'loss', 'content': 0.07648710906505585, 'timestamp': '2025-09-30 22:25:12.951262', 'step': 8752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:13.008737', 'step': 8752, 'epoch': 2} {'type': 'loss', 'content': 0.12906375527381897, 'timestamp': '2025-09-30 22:25:13.016475', 'step': 8753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:13.074592', 'step': 8753, 'epoch': 2} {'type': 'loss', 'content': 0.1071244478225708, 'timestamp': '2025-09-30 22:25:13.077951', 'step': 8754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:13.143973', 'step': 8754, 'epoch': 2} {'type': 'loss', 'content': 0.1582018882036209, 'timestamp': '2025-09-30 22:25:13.148010', 'step': 8755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:13.207484', 'step': 8755, 'epoch': 2} {'type': 'loss', 'content': 0.183768168091774, 'timestamp': '2025-09-30 22:25:13.213884', 'step': 8756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:13.277320', 'step': 8756, 'epoch': 2} {'type': 'loss', 'content': 0.08182499557733536, 'timestamp': '2025-09-30 22:25:13.280526', 'step': 8757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:13.342090', 'step': 8757, 'epoch': 2} {'type': 'loss', 'content': 0.11745879054069519, 'timestamp': '2025-09-30 22:25:13.352862', 'step': 8758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:13.414928', 'step': 8758, 'epoch': 2} {'type': 'loss', 'content': 0.12795716524124146, 'timestamp': '2025-09-30 22:25:13.427047', 'step': 8759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:13.496502', 'step': 8759, 'epoch': 2} {'type': 'loss', 'content': 0.10578696429729462, 'timestamp': '2025-09-30 22:25:13.504418', 'step': 8760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:13.569271', 'step': 8760, 'epoch': 2} {'type': 'loss', 'content': 0.23255524039268494, 'timestamp': '2025-09-30 22:25:13.572187', 'step': 8761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:13.631440', 'step': 8761, 'epoch': 2} {'type': 'loss', 'content': 0.16396568715572357, 'timestamp': '2025-09-30 22:25:13.641976', 'step': 8762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:13.700155', 'step': 8762, 'epoch': 2} {'type': 'loss', 'content': 0.18883757293224335, 'timestamp': '2025-09-30 22:25:13.715483', 'step': 8763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:13.783233', 'step': 8763, 'epoch': 2} {'type': 'loss', 'content': 0.12850256264209747, 'timestamp': '2025-09-30 22:25:13.796109', 'step': 8764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:13.865176', 'step': 8764, 'epoch': 2} {'type': 'loss', 'content': 0.12698322534561157, 'timestamp': '2025-09-30 22:25:13.868412', 'step': 8765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:13.924874', 'step': 8765, 'epoch': 2} {'type': 'loss', 'content': 0.24213992059230804, 'timestamp': '2025-09-30 22:25:13.935980', 'step': 8766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:14.000344', 'step': 8766, 'epoch': 2} {'type': 'loss', 'content': 0.13627393543720245, 'timestamp': '2025-09-30 22:25:14.004442', 'step': 8767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:14.061627', 'step': 8767, 'epoch': 2} {'type': 'loss', 'content': 0.15710848569869995, 'timestamp': '2025-09-30 22:25:14.069190', 'step': 8768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:14.127688', 'step': 8768, 'epoch': 2} {'type': 'loss', 'content': 0.10148915648460388, 'timestamp': '2025-09-30 22:25:14.131845', 'step': 8769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:14.189383', 'step': 8769, 'epoch': 2} {'type': 'loss', 'content': 0.07936745882034302, 'timestamp': '2025-09-30 22:25:14.193344', 'step': 8770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:14.251705', 'step': 8770, 'epoch': 2} {'type': 'loss', 'content': 0.11676868796348572, 'timestamp': '2025-09-30 22:25:14.255083', 'step': 8771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:14.318152', 'step': 8771, 'epoch': 2} {'type': 'loss', 'content': 0.10903965681791306, 'timestamp': '2025-09-30 22:25:14.329391', 'step': 8772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:14.392414', 'step': 8772, 'epoch': 2} {'type': 'loss', 'content': 0.12501993775367737, 'timestamp': '2025-09-30 22:25:14.394776', 'step': 8773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:14.462441', 'step': 8773, 'epoch': 2} {'type': 'loss', 'content': 0.1378050297498703, 'timestamp': '2025-09-30 22:25:14.465754', 'step': 8774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:14.539701', 'step': 8774, 'epoch': 2} {'type': 'loss', 'content': 0.11245381087064743, 'timestamp': '2025-09-30 22:25:14.543161', 'step': 8775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:14.600917', 'step': 8775, 'epoch': 2} {'type': 'loss', 'content': 0.1377713978290558, 'timestamp': '2025-09-30 22:25:14.607317', 'step': 8776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:14.667316', 'step': 8776, 'epoch': 2} {'type': 'loss', 'content': 0.09158123284578323, 'timestamp': '2025-09-30 22:25:14.670775', 'step': 8777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:14.728960', 'step': 8777, 'epoch': 2} {'type': 'loss', 'content': 0.11207954585552216, 'timestamp': '2025-09-30 22:25:14.732700', 'step': 8778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:14.796360', 'step': 8778, 'epoch': 2} {'type': 'loss', 'content': 0.16167764365673065, 'timestamp': '2025-09-30 22:25:14.799191', 'step': 8779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:14.859052', 'step': 8779, 'epoch': 2} {'type': 'loss', 'content': 0.06251831352710724, 'timestamp': '2025-09-30 22:25:14.866875', 'step': 8780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:14.924165', 'step': 8780, 'epoch': 2} {'type': 'loss', 'content': 0.17453111708164215, 'timestamp': '2025-09-30 22:25:14.936038', 'step': 8781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:25:15.015453', 'step': 8781, 'epoch': 2} {'type': 'loss', 'content': 0.09849978983402252, 'timestamp': '2025-09-30 22:25:15.017845', 'step': 8782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:15.078847', 'step': 8782, 'epoch': 2} {'type': 'loss', 'content': 0.09113606810569763, 'timestamp': '2025-09-30 22:25:15.082572', 'step': 8783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:15.169066', 'step': 8783, 'epoch': 2} {'type': 'loss', 'content': 0.11306162178516388, 'timestamp': '2025-09-30 22:25:15.186872', 'step': 8784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:15.264802', 'step': 8784, 'epoch': 2} {'type': 'loss', 'content': 0.23623941838741302, 'timestamp': '2025-09-30 22:25:15.275495', 'step': 8785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:15.354333', 'step': 8785, 'epoch': 2} {'type': 'loss', 'content': 0.07699593156576157, 'timestamp': '2025-09-30 22:25:15.368016', 'step': 8786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:15.446526', 'step': 8786, 'epoch': 2} {'type': 'loss', 'content': 0.13712438941001892, 'timestamp': '2025-09-30 22:25:15.449175', 'step': 8787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:15.531578', 'step': 8787, 'epoch': 2} {'type': 'loss', 'content': 0.11862089484930038, 'timestamp': '2025-09-30 22:25:15.539386', 'step': 8788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:15.626111', 'step': 8788, 'epoch': 2} {'type': 'loss', 'content': 0.1084468811750412, 'timestamp': '2025-09-30 22:25:15.628872', 'step': 8789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:15.693051', 'step': 8789, 'epoch': 2} {'type': 'loss', 'content': 0.06905703991651535, 'timestamp': '2025-09-30 22:25:15.697351', 'step': 8790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:15.778909', 'step': 8790, 'epoch': 2} {'type': 'loss', 'content': 0.09160113334655762, 'timestamp': '2025-09-30 22:25:15.781741', 'step': 8791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:15.869937', 'step': 8791, 'epoch': 2} {'type': 'loss', 'content': 0.2270745038986206, 'timestamp': '2025-09-30 22:25:15.880896', 'step': 8792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:15.952100', 'step': 8792, 'epoch': 2} {'type': 'loss', 'content': 0.12812918424606323, 'timestamp': '2025-09-30 22:25:15.958043', 'step': 8793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:16.036209', 'step': 8793, 'epoch': 2} {'type': 'loss', 'content': 0.15479770302772522, 'timestamp': '2025-09-30 22:25:16.039460', 'step': 8794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:16.106805', 'step': 8794, 'epoch': 2} {'type': 'loss', 'content': 0.1815703809261322, 'timestamp': '2025-09-30 22:25:16.110636', 'step': 8795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:16.184090', 'step': 8795, 'epoch': 2} {'type': 'loss', 'content': 0.1272427886724472, 'timestamp': '2025-09-30 22:25:16.198809', 'step': 8796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:16.274304', 'step': 8796, 'epoch': 2} {'type': 'loss', 'content': 0.0752001628279686, 'timestamp': '2025-09-30 22:25:16.278128', 'step': 8797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:16.363481', 'step': 8797, 'epoch': 2} {'type': 'loss', 'content': 0.06347325444221497, 'timestamp': '2025-09-30 22:25:16.366383', 'step': 8798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:16.437942', 'step': 8798, 'epoch': 2} {'type': 'loss', 'content': 0.14781931042671204, 'timestamp': '2025-09-30 22:25:16.446773', 'step': 8799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:16.507579', 'step': 8799, 'epoch': 2} {'type': 'loss', 'content': 0.07657977938652039, 'timestamp': '2025-09-30 22:25:16.513910', 'step': 8800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:16.572600', 'step': 8800, 'epoch': 2} {'type': 'loss', 'content': 0.16751989722251892, 'timestamp': '2025-09-30 22:25:16.583340', 'step': 8801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:16.648200', 'step': 8801, 'epoch': 2} {'type': 'loss', 'content': 0.12296394258737564, 'timestamp': '2025-09-30 22:25:16.658619', 'step': 8802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:16.721324', 'step': 8802, 'epoch': 2} {'type': 'loss', 'content': 0.08549399673938751, 'timestamp': '2025-09-30 22:25:16.723827', 'step': 8803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:16.789820', 'step': 8803, 'epoch': 2} {'type': 'loss', 'content': 0.07072166353464127, 'timestamp': '2025-09-30 22:25:16.796894', 'step': 8804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:16.866250', 'step': 8804, 'epoch': 2} {'type': 'loss', 'content': 0.19549481570720673, 'timestamp': '2025-09-30 22:25:16.876642', 'step': 8805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:16.933653', 'step': 8805, 'epoch': 2} {'type': 'loss', 'content': 0.14359554648399353, 'timestamp': '2025-09-30 22:25:16.936586', 'step': 8806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:16.993976', 'step': 8806, 'epoch': 2} {'type': 'loss', 'content': 0.0862506628036499, 'timestamp': '2025-09-30 22:25:16.996570', 'step': 8807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.055069', 'step': 8807, 'epoch': 2} {'type': 'loss', 'content': 0.18175330758094788, 'timestamp': '2025-09-30 22:25:17.061709', 'step': 8808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.119306', 'step': 8808, 'epoch': 2} {'type': 'loss', 'content': 0.20153826475143433, 'timestamp': '2025-09-30 22:25:17.122649', 'step': 8809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:17.188074', 'step': 8809, 'epoch': 2} {'type': 'loss', 'content': 0.10978484898805618, 'timestamp': '2025-09-30 22:25:17.191361', 'step': 8810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.260474', 'step': 8810, 'epoch': 2} {'type': 'loss', 'content': 0.11428748071193695, 'timestamp': '2025-09-30 22:25:17.264747', 'step': 8811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:17.322396', 'step': 8811, 'epoch': 2} {'type': 'loss', 'content': 0.07542899250984192, 'timestamp': '2025-09-30 22:25:17.329510', 'step': 8812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.386222', 'step': 8812, 'epoch': 2} {'type': 'loss', 'content': 0.15076576173305511, 'timestamp': '2025-09-30 22:25:17.390229', 'step': 8813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.450853', 'step': 8813, 'epoch': 2} {'type': 'loss', 'content': 0.11653530597686768, 'timestamp': '2025-09-30 22:25:17.454153', 'step': 8814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.522452', 'step': 8814, 'epoch': 2} {'type': 'loss', 'content': 0.2131069004535675, 'timestamp': '2025-09-30 22:25:17.535505', 'step': 8815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:17.595709', 'step': 8815, 'epoch': 2} {'type': 'loss', 'content': 0.07858850061893463, 'timestamp': '2025-09-30 22:25:17.601794', 'step': 8816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:17.668299', 'step': 8816, 'epoch': 2} {'type': 'loss', 'content': 0.15890486538410187, 'timestamp': '2025-09-30 22:25:17.671272', 'step': 8817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.729436', 'step': 8817, 'epoch': 2} {'type': 'loss', 'content': 0.21890364587306976, 'timestamp': '2025-09-30 22:25:17.731799', 'step': 8818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:17.793155', 'step': 8818, 'epoch': 2} {'type': 'loss', 'content': 0.2013648897409439, 'timestamp': '2025-09-30 22:25:17.796280', 'step': 8819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:17.853661', 'step': 8819, 'epoch': 2} {'type': 'loss', 'content': 0.18793921172618866, 'timestamp': '2025-09-30 22:25:17.861138', 'step': 8820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:17.918575', 'step': 8820, 'epoch': 2} {'type': 'loss', 'content': 0.12730465829372406, 'timestamp': '2025-09-30 22:25:17.921897', 'step': 8821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:17.997805', 'step': 8821, 'epoch': 2} {'type': 'loss', 'content': 0.0661558136343956, 'timestamp': '2025-09-30 22:25:18.000585', 'step': 8822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:18.064258', 'step': 8822, 'epoch': 2} {'type': 'loss', 'content': 0.10183659195899963, 'timestamp': '2025-09-30 22:25:18.067749', 'step': 8823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:18.133169', 'step': 8823, 'epoch': 2} {'type': 'loss', 'content': 0.15976330637931824, 'timestamp': '2025-09-30 22:25:18.145557', 'step': 8824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:18.205083', 'step': 8824, 'epoch': 2} {'type': 'loss', 'content': 0.08622033149003983, 'timestamp': '2025-09-30 22:25:18.208417', 'step': 8825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:18.265751', 'step': 8825, 'epoch': 2} {'type': 'loss', 'content': 0.10935992002487183, 'timestamp': '2025-09-30 22:25:18.269020', 'step': 8826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:18.327329', 'step': 8826, 'epoch': 2} {'type': 'loss', 'content': 0.14939378201961517, 'timestamp': '2025-09-30 22:25:18.331028', 'step': 8827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:18.395424', 'step': 8827, 'epoch': 2} {'type': 'loss', 'content': 0.058003734797239304, 'timestamp': '2025-09-30 22:25:18.407584', 'step': 8828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:18.482940', 'step': 8828, 'epoch': 2} {'type': 'loss', 'content': 0.11574687063694, 'timestamp': '2025-09-30 22:25:18.487381', 'step': 8829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:18.568175', 'step': 8829, 'epoch': 2} {'type': 'loss', 'content': 0.14353890717029572, 'timestamp': '2025-09-30 22:25:18.575469', 'step': 8830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:18.638911', 'step': 8830, 'epoch': 2} {'type': 'loss', 'content': 0.16438299417495728, 'timestamp': '2025-09-30 22:25:18.643370', 'step': 8831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:18.703461', 'step': 8831, 'epoch': 2} {'type': 'loss', 'content': 0.10149182379245758, 'timestamp': '2025-09-30 22:25:18.709655', 'step': 8832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:18.770787', 'step': 8832, 'epoch': 2} {'type': 'loss', 'content': 0.03710238263010979, 'timestamp': '2025-09-30 22:25:18.774205', 'step': 8833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:18.839290', 'step': 8833, 'epoch': 2} {'type': 'loss', 'content': 0.2032909393310547, 'timestamp': '2025-09-30 22:25:18.850824', 'step': 8834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:18.908366', 'step': 8834, 'epoch': 2} {'type': 'loss', 'content': 0.12090309709310532, 'timestamp': '2025-09-30 22:25:18.912803', 'step': 8835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:18.981334', 'step': 8835, 'epoch': 2} {'type': 'loss', 'content': 0.23183953762054443, 'timestamp': '2025-09-30 22:25:18.989803', 'step': 8836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:19.055363', 'step': 8836, 'epoch': 2} {'type': 'loss', 'content': 0.12811081111431122, 'timestamp': '2025-09-30 22:25:19.058261', 'step': 8837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:19.124561', 'step': 8837, 'epoch': 2} {'type': 'loss', 'content': 0.20922382175922394, 'timestamp': '2025-09-30 22:25:19.127726', 'step': 8838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:19.186731', 'step': 8838, 'epoch': 2} {'type': 'loss', 'content': 0.1167960911989212, 'timestamp': '2025-09-30 22:25:19.189843', 'step': 8839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:19.250103', 'step': 8839, 'epoch': 2} {'type': 'loss', 'content': 0.11113232374191284, 'timestamp': '2025-09-30 22:25:19.256183', 'step': 8840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:19.317072', 'step': 8840, 'epoch': 2} {'type': 'loss', 'content': 0.09719141572713852, 'timestamp': '2025-09-30 22:25:19.322280', 'step': 8841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:19.390775', 'step': 8841, 'epoch': 2} {'type': 'loss', 'content': 0.0822238177061081, 'timestamp': '2025-09-30 22:25:19.402564', 'step': 8842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:19.460515', 'step': 8842, 'epoch': 2} {'type': 'loss', 'content': 0.12373153120279312, 'timestamp': '2025-09-30 22:25:19.463522', 'step': 8843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:19.522311', 'step': 8843, 'epoch': 2} {'type': 'loss', 'content': 0.08420039713382721, 'timestamp': '2025-09-30 22:25:19.529526', 'step': 8844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:19.588363', 'step': 8844, 'epoch': 2} {'type': 'loss', 'content': 0.13516293466091156, 'timestamp': '2025-09-30 22:25:19.591591', 'step': 8845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:19.650895', 'step': 8845, 'epoch': 2} {'type': 'loss', 'content': 0.14309361577033997, 'timestamp': '2025-09-30 22:25:19.655000', 'step': 8846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:19.725259', 'step': 8846, 'epoch': 2} {'type': 'loss', 'content': 0.09140913933515549, 'timestamp': '2025-09-30 22:25:19.728479', 'step': 8847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:19.793733', 'step': 8847, 'epoch': 2} {'type': 'loss', 'content': 0.10586749762296677, 'timestamp': '2025-09-30 22:25:19.805680', 'step': 8848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:19.883347', 'step': 8848, 'epoch': 2} {'type': 'loss', 'content': 0.17110249400138855, 'timestamp': '2025-09-30 22:25:19.887085', 'step': 8849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:19.956572', 'step': 8849, 'epoch': 2} {'type': 'loss', 'content': 0.1355256885290146, 'timestamp': '2025-09-30 22:25:19.958993', 'step': 8850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:20.025236', 'step': 8850, 'epoch': 2} {'type': 'loss', 'content': 0.15823225677013397, 'timestamp': '2025-09-30 22:25:20.027871', 'step': 8851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:20.100802', 'step': 8851, 'epoch': 2} {'type': 'loss', 'content': 0.052242595702409744, 'timestamp': '2025-09-30 22:25:20.108308', 'step': 8852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:20.166363', 'step': 8852, 'epoch': 2} {'type': 'loss', 'content': 0.1504686176776886, 'timestamp': '2025-09-30 22:25:20.168957', 'step': 8853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:20.229502', 'step': 8853, 'epoch': 2} {'type': 'loss', 'content': 0.14105546474456787, 'timestamp': '2025-09-30 22:25:20.233258', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:25:35.108611', 'step': 8854, 'epoch': 2} {'type': 'pplx', 'content': 14055.025394093396, 'timestamp': '2025-09-30 22:25:35.131533', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:35.189525', 'step': 8854, 'epoch': 2} {'type': 'loss', 'content': 0.11553064733743668, 'timestamp': '2025-09-30 22:25:35.192948', 'step': 8855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:35.250769', 'step': 8855, 'epoch': 2} {'type': 'loss', 'content': 0.20377850532531738, 'timestamp': '2025-09-30 22:25:35.258885', 'step': 8856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:35.315785', 'step': 8856, 'epoch': 2} {'type': 'loss', 'content': 0.14456409215927124, 'timestamp': '2025-09-30 22:25:35.318969', 'step': 8857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:35.378577', 'step': 8857, 'epoch': 2} {'type': 'loss', 'content': 0.10121124982833862, 'timestamp': '2025-09-30 22:25:35.381073', 'step': 8858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:35.440728', 'step': 8858, 'epoch': 2} {'type': 'loss', 'content': 0.15986448526382446, 'timestamp': '2025-09-30 22:25:35.443201', 'step': 8859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:35.502998', 'step': 8859, 'epoch': 2} {'type': 'loss', 'content': 0.11792103946208954, 'timestamp': '2025-09-30 22:25:35.509136', 'step': 8860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:35.566451', 'step': 8860, 'epoch': 2} {'type': 'loss', 'content': 0.16927216947078705, 'timestamp': '2025-09-30 22:25:35.572286', 'step': 8861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:35.629326', 'step': 8861, 'epoch': 2} {'type': 'loss', 'content': 0.13053587079048157, 'timestamp': '2025-09-30 22:25:35.638762', 'step': 8862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:35.698803', 'step': 8862, 'epoch': 2} {'type': 'loss', 'content': 0.07741700112819672, 'timestamp': '2025-09-30 22:25:35.701687', 'step': 8863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:35.762668', 'step': 8863, 'epoch': 2} {'type': 'loss', 'content': 0.1329183727502823, 'timestamp': '2025-09-30 22:25:35.773094', 'step': 8864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:35.830364', 'step': 8864, 'epoch': 2} {'type': 'loss', 'content': 0.13979052007198334, 'timestamp': '2025-09-30 22:25:35.834673', 'step': 8865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:35.893263', 'step': 8865, 'epoch': 2} {'type': 'loss', 'content': 0.10242436826229095, 'timestamp': '2025-09-30 22:25:35.906423', 'step': 8866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:35.963216', 'step': 8866, 'epoch': 2} {'type': 'loss', 'content': 0.13819800317287445, 'timestamp': '2025-09-30 22:25:35.967940', 'step': 8867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:36.027139', 'step': 8867, 'epoch': 2} {'type': 'loss', 'content': 0.21818740665912628, 'timestamp': '2025-09-30 22:25:36.033339', 'step': 8868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:36.104080', 'step': 8868, 'epoch': 2} {'type': 'loss', 'content': 0.09388027340173721, 'timestamp': '2025-09-30 22:25:36.110098', 'step': 8869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:36.170466', 'step': 8869, 'epoch': 2} {'type': 'loss', 'content': 0.09969917684793472, 'timestamp': '2025-09-30 22:25:36.174609', 'step': 8870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:36.231716', 'step': 8870, 'epoch': 2} {'type': 'loss', 'content': 0.1204608827829361, 'timestamp': '2025-09-30 22:25:36.234966', 'step': 8871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:36.296549', 'step': 8871, 'epoch': 2} {'type': 'loss', 'content': 0.07096360623836517, 'timestamp': '2025-09-30 22:25:36.302947', 'step': 8872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:36.375804', 'step': 8872, 'epoch': 2} {'type': 'loss', 'content': 0.19689688086509705, 'timestamp': '2025-09-30 22:25:36.382118', 'step': 8873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:36.439391', 'step': 8873, 'epoch': 2} {'type': 'loss', 'content': 0.12711496651172638, 'timestamp': '2025-09-30 22:25:36.446411', 'step': 8874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:36.506418', 'step': 8874, 'epoch': 2} {'type': 'loss', 'content': 0.11331326514482498, 'timestamp': '2025-09-30 22:25:36.509395', 'step': 8875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:36.570124', 'step': 8875, 'epoch': 2} {'type': 'loss', 'content': 0.10694108158349991, 'timestamp': '2025-09-30 22:25:36.583178', 'step': 8876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:36.640662', 'step': 8876, 'epoch': 2} {'type': 'loss', 'content': 0.2401851862668991, 'timestamp': '2025-09-30 22:25:36.643575', 'step': 8877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:36.701479', 'step': 8877, 'epoch': 2} {'type': 'loss', 'content': 0.15079377591609955, 'timestamp': '2025-09-30 22:25:36.713164', 'step': 8878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:36.772237', 'step': 8878, 'epoch': 2} {'type': 'loss', 'content': 0.16230447590351105, 'timestamp': '2025-09-30 22:25:36.781875', 'step': 8879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:36.842536', 'step': 8879, 'epoch': 2} {'type': 'loss', 'content': 0.23433588445186615, 'timestamp': '2025-09-30 22:25:36.850467', 'step': 8880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:36.909175', 'step': 8880, 'epoch': 2} {'type': 'loss', 'content': 0.10705507546663284, 'timestamp': '2025-09-30 22:25:36.912037', 'step': 8881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:36.976245', 'step': 8881, 'epoch': 2} {'type': 'loss', 'content': 0.16255725920200348, 'timestamp': '2025-09-30 22:25:36.986659', 'step': 8882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:37.047162', 'step': 8882, 'epoch': 2} {'type': 'loss', 'content': 0.11679855734109879, 'timestamp': '2025-09-30 22:25:37.050633', 'step': 8883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:37.109238', 'step': 8883, 'epoch': 2} {'type': 'loss', 'content': 0.16032764315605164, 'timestamp': '2025-09-30 22:25:37.119401', 'step': 8884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:37.178794', 'step': 8884, 'epoch': 2} {'type': 'loss', 'content': 0.11988049000501633, 'timestamp': '2025-09-30 22:25:37.187851', 'step': 8885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:37.252608', 'step': 8885, 'epoch': 2} {'type': 'loss', 'content': 0.2273387461900711, 'timestamp': '2025-09-30 22:25:37.257108', 'step': 8886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:37.325113', 'step': 8886, 'epoch': 2} {'type': 'loss', 'content': 0.11932222545146942, 'timestamp': '2025-09-30 22:25:37.329792', 'step': 8887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:37.388677', 'step': 8887, 'epoch': 2} {'type': 'loss', 'content': 0.11535491794347763, 'timestamp': '2025-09-30 22:25:37.399352', 'step': 8888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:37.456124', 'step': 8888, 'epoch': 2} {'type': 'loss', 'content': 0.085465207695961, 'timestamp': '2025-09-30 22:25:37.467645', 'step': 8889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:37.526113', 'step': 8889, 'epoch': 2} {'type': 'loss', 'content': 0.17573364078998566, 'timestamp': '2025-09-30 22:25:37.528974', 'step': 8890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:37.592681', 'step': 8890, 'epoch': 2} {'type': 'loss', 'content': 0.1515510380268097, 'timestamp': '2025-09-30 22:25:37.596434', 'step': 8891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:37.656534', 'step': 8891, 'epoch': 2} {'type': 'loss', 'content': 0.19197717308998108, 'timestamp': '2025-09-30 22:25:37.664282', 'step': 8892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:37.728272', 'step': 8892, 'epoch': 2} {'type': 'loss', 'content': 0.08641952276229858, 'timestamp': '2025-09-30 22:25:37.732876', 'step': 8893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:37.791693', 'step': 8893, 'epoch': 2} {'type': 'loss', 'content': 0.16666512191295624, 'timestamp': '2025-09-30 22:25:37.802743', 'step': 8894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:37.865323', 'step': 8894, 'epoch': 2} {'type': 'loss', 'content': 0.08315234631299973, 'timestamp': '2025-09-30 22:25:37.871753', 'step': 8895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:37.935582', 'step': 8895, 'epoch': 2} {'type': 'loss', 'content': 0.08268915861845016, 'timestamp': '2025-09-30 22:25:37.943651', 'step': 8896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:38.007841', 'step': 8896, 'epoch': 2} {'type': 'loss', 'content': 0.152423694729805, 'timestamp': '2025-09-30 22:25:38.011638', 'step': 8897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:38.070973', 'step': 8897, 'epoch': 2} {'type': 'loss', 'content': 0.1179155707359314, 'timestamp': '2025-09-30 22:25:38.080530', 'step': 8898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:38.151721', 'step': 8898, 'epoch': 2} {'type': 'loss', 'content': 0.12573881447315216, 'timestamp': '2025-09-30 22:25:38.162053', 'step': 8899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:38.221576', 'step': 8899, 'epoch': 2} {'type': 'loss', 'content': 0.13671942055225372, 'timestamp': '2025-09-30 22:25:38.232176', 'step': 8900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:38.299002', 'step': 8900, 'epoch': 2} {'type': 'loss', 'content': 0.08871834725141525, 'timestamp': '2025-09-30 22:25:38.307798', 'step': 8901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:38.374237', 'step': 8901, 'epoch': 2} {'type': 'loss', 'content': 0.18408502638339996, 'timestamp': '2025-09-30 22:25:38.376652', 'step': 8902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:38.436589', 'step': 8902, 'epoch': 2} {'type': 'loss', 'content': 0.12849655747413635, 'timestamp': '2025-09-30 22:25:38.440568', 'step': 8903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:38.504795', 'step': 8903, 'epoch': 2} {'type': 'loss', 'content': 0.2185138463973999, 'timestamp': '2025-09-30 22:25:38.512238', 'step': 8904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:38.577794', 'step': 8904, 'epoch': 2} {'type': 'loss', 'content': 0.2803049087524414, 'timestamp': '2025-09-30 22:25:38.586425', 'step': 8905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:38.661469', 'step': 8905, 'epoch': 2} {'type': 'loss', 'content': 0.10008949786424637, 'timestamp': '2025-09-30 22:25:38.673558', 'step': 8906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:38.738429', 'step': 8906, 'epoch': 2} {'type': 'loss', 'content': 0.12718705832958221, 'timestamp': '2025-09-30 22:25:38.751469', 'step': 8907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:38.817971', 'step': 8907, 'epoch': 2} {'type': 'loss', 'content': 0.19634418189525604, 'timestamp': '2025-09-30 22:25:38.825496', 'step': 8908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:38.901910', 'step': 8908, 'epoch': 2} {'type': 'loss', 'content': 0.10947397351264954, 'timestamp': '2025-09-30 22:25:38.904871', 'step': 8909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:38.969896', 'step': 8909, 'epoch': 2} {'type': 'loss', 'content': 0.14282330870628357, 'timestamp': '2025-09-30 22:25:38.973411', 'step': 8910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:39.036349', 'step': 8910, 'epoch': 2} {'type': 'loss', 'content': 0.20019252598285675, 'timestamp': '2025-09-30 22:25:39.041215', 'step': 8911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:39.106570', 'step': 8911, 'epoch': 2} {'type': 'loss', 'content': 0.1104278564453125, 'timestamp': '2025-09-30 22:25:39.119185', 'step': 8912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:39.179846', 'step': 8912, 'epoch': 2} {'type': 'loss', 'content': 0.1037755161523819, 'timestamp': '2025-09-30 22:25:39.185117', 'step': 8913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:39.246725', 'step': 8913, 'epoch': 2} {'type': 'loss', 'content': 0.06276458501815796, 'timestamp': '2025-09-30 22:25:39.257731', 'step': 8914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:39.316093', 'step': 8914, 'epoch': 2} {'type': 'loss', 'content': 0.2230357974767685, 'timestamp': '2025-09-30 22:25:39.325329', 'step': 8915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:39.392818', 'step': 8915, 'epoch': 2} {'type': 'loss', 'content': 0.0599801242351532, 'timestamp': '2025-09-30 22:25:39.399792', 'step': 8916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:39.459268', 'step': 8916, 'epoch': 2} {'type': 'loss', 'content': 0.0757334977388382, 'timestamp': '2025-09-30 22:25:39.464990', 'step': 8917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:39.531699', 'step': 8917, 'epoch': 2} {'type': 'loss', 'content': 0.08281809091567993, 'timestamp': '2025-09-30 22:25:39.536747', 'step': 8918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:39.598990', 'step': 8918, 'epoch': 2} {'type': 'loss', 'content': 0.15297287702560425, 'timestamp': '2025-09-30 22:25:39.603631', 'step': 8919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:39.663776', 'step': 8919, 'epoch': 2} {'type': 'loss', 'content': 0.11917408555746078, 'timestamp': '2025-09-30 22:25:39.672338', 'step': 8920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:39.740975', 'step': 8920, 'epoch': 2} {'type': 'loss', 'content': 0.1110101044178009, 'timestamp': '2025-09-30 22:25:39.751505', 'step': 8921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:39.818304', 'step': 8921, 'epoch': 2} {'type': 'loss', 'content': 0.14780329167842865, 'timestamp': '2025-09-30 22:25:39.829436', 'step': 8922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:39.891094', 'step': 8922, 'epoch': 2} {'type': 'loss', 'content': 0.24629844725131989, 'timestamp': '2025-09-30 22:25:39.896531', 'step': 8923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:39.959290', 'step': 8923, 'epoch': 2} {'type': 'loss', 'content': 0.16546103358268738, 'timestamp': '2025-09-30 22:25:39.966643', 'step': 8924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:40.027680', 'step': 8924, 'epoch': 2} {'type': 'loss', 'content': 0.14596787095069885, 'timestamp': '2025-09-30 22:25:40.030283', 'step': 8925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:40.099333', 'step': 8925, 'epoch': 2} {'type': 'loss', 'content': 0.11180315911769867, 'timestamp': '2025-09-30 22:25:40.102206', 'step': 8926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:40.167738', 'step': 8926, 'epoch': 2} {'type': 'loss', 'content': 0.07467486709356308, 'timestamp': '2025-09-30 22:25:40.175885', 'step': 8927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:40.241964', 'step': 8927, 'epoch': 2} {'type': 'loss', 'content': 0.23651711642742157, 'timestamp': '2025-09-30 22:25:40.258733', 'step': 8928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:40.318153', 'step': 8928, 'epoch': 2} {'type': 'loss', 'content': 0.2175532430410385, 'timestamp': '2025-09-30 22:25:40.326844', 'step': 8929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:40.386257', 'step': 8929, 'epoch': 2} {'type': 'loss', 'content': 0.1752052754163742, 'timestamp': '2025-09-30 22:25:40.392004', 'step': 8930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:40.452148', 'step': 8930, 'epoch': 2} {'type': 'loss', 'content': 0.10223408043384552, 'timestamp': '2025-09-30 22:25:40.455832', 'step': 8931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:40.514557', 'step': 8931, 'epoch': 2} {'type': 'loss', 'content': 0.08313766121864319, 'timestamp': '2025-09-30 22:25:40.520713', 'step': 8932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:40.579106', 'step': 8932, 'epoch': 2} {'type': 'loss', 'content': 0.08406894654035568, 'timestamp': '2025-09-30 22:25:40.586170', 'step': 8933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:40.646449', 'step': 8933, 'epoch': 2} {'type': 'loss', 'content': 0.1308240294456482, 'timestamp': '2025-09-30 22:25:40.652198', 'step': 8934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:40.710848', 'step': 8934, 'epoch': 2} {'type': 'loss', 'content': 0.0955180898308754, 'timestamp': '2025-09-30 22:25:40.713558', 'step': 8935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:40.771553', 'step': 8935, 'epoch': 2} {'type': 'loss', 'content': 0.0759604424238205, 'timestamp': '2025-09-30 22:25:40.778741', 'step': 8936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:40.843140', 'step': 8936, 'epoch': 2} {'type': 'loss', 'content': 0.18272657692432404, 'timestamp': '2025-09-30 22:25:40.847186', 'step': 8937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:40.910749', 'step': 8937, 'epoch': 2} {'type': 'loss', 'content': 0.04559240862727165, 'timestamp': '2025-09-30 22:25:40.913519', 'step': 8938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:40.972546', 'step': 8938, 'epoch': 2} {'type': 'loss', 'content': 0.18198338150978088, 'timestamp': '2025-09-30 22:25:40.977365', 'step': 8939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:41.035921', 'step': 8939, 'epoch': 2} {'type': 'loss', 'content': 0.13245238363742828, 'timestamp': '2025-09-30 22:25:41.043152', 'step': 8940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:41.101636', 'step': 8940, 'epoch': 2} {'type': 'loss', 'content': 0.09037083387374878, 'timestamp': '2025-09-30 22:25:41.104084', 'step': 8941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:41.163160', 'step': 8941, 'epoch': 2} {'type': 'loss', 'content': 0.10981913655996323, 'timestamp': '2025-09-30 22:25:41.166980', 'step': 8942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:41.228218', 'step': 8942, 'epoch': 2} {'type': 'loss', 'content': 0.10587704181671143, 'timestamp': '2025-09-30 22:25:41.231394', 'step': 8943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:41.290807', 'step': 8943, 'epoch': 2} {'type': 'loss', 'content': 0.08372698724269867, 'timestamp': '2025-09-30 22:25:41.297815', 'step': 8944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:41.355435', 'step': 8944, 'epoch': 2} {'type': 'loss', 'content': 0.16900867223739624, 'timestamp': '2025-09-30 22:25:41.359304', 'step': 8945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:41.418834', 'step': 8945, 'epoch': 2} {'type': 'loss', 'content': 0.13806356489658356, 'timestamp': '2025-09-30 22:25:41.422397', 'step': 8946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:41.481340', 'step': 8946, 'epoch': 2} {'type': 'loss', 'content': 0.0960509330034256, 'timestamp': '2025-09-30 22:25:41.491351', 'step': 8947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:41.550456', 'step': 8947, 'epoch': 2} {'type': 'loss', 'content': 0.15170976519584656, 'timestamp': '2025-09-30 22:25:41.557348', 'step': 8948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:41.616806', 'step': 8948, 'epoch': 2} {'type': 'loss', 'content': 0.16645187139511108, 'timestamp': '2025-09-30 22:25:41.621875', 'step': 8949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:41.690958', 'step': 8949, 'epoch': 2} {'type': 'loss', 'content': 0.16674186289310455, 'timestamp': '2025-09-30 22:25:41.698136', 'step': 8950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:41.761930', 'step': 8950, 'epoch': 2} {'type': 'loss', 'content': 0.11726510524749756, 'timestamp': '2025-09-30 22:25:41.765418', 'step': 8951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:41.826968', 'step': 8951, 'epoch': 2} {'type': 'loss', 'content': 0.09795007854700089, 'timestamp': '2025-09-30 22:25:41.834506', 'step': 8952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:41.895207', 'step': 8952, 'epoch': 2} {'type': 'loss', 'content': 0.17614494264125824, 'timestamp': '2025-09-30 22:25:41.897907', 'step': 8953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:41.957076', 'step': 8953, 'epoch': 2} {'type': 'loss', 'content': 0.11398757249116898, 'timestamp': '2025-09-30 22:25:41.960731', 'step': 8954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:42.024000', 'step': 8954, 'epoch': 2} {'type': 'loss', 'content': 0.13211926817893982, 'timestamp': '2025-09-30 22:25:42.027765', 'step': 8955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:42.095758', 'step': 8955, 'epoch': 2} {'type': 'loss', 'content': 0.27915287017822266, 'timestamp': '2025-09-30 22:25:42.102434', 'step': 8956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:42.166727', 'step': 8956, 'epoch': 2} {'type': 'loss', 'content': 0.15046752989292145, 'timestamp': '2025-09-30 22:25:42.176175', 'step': 8957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:42.241756', 'step': 8957, 'epoch': 2} {'type': 'loss', 'content': 0.11093761771917343, 'timestamp': '2025-09-30 22:25:42.244959', 'step': 8958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:42.305598', 'step': 8958, 'epoch': 2} {'type': 'loss', 'content': 0.10143742710351944, 'timestamp': '2025-09-30 22:25:42.308562', 'step': 8959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:42.376563', 'step': 8959, 'epoch': 2} {'type': 'loss', 'content': 0.13929100334644318, 'timestamp': '2025-09-30 22:25:42.383340', 'step': 8960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:42.440754', 'step': 8960, 'epoch': 2} {'type': 'loss', 'content': 0.19800730049610138, 'timestamp': '2025-09-30 22:25:42.443273', 'step': 8961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:42.515711', 'step': 8961, 'epoch': 2} {'type': 'loss', 'content': 0.16439495980739594, 'timestamp': '2025-09-30 22:25:42.521029', 'step': 8962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:42.580229', 'step': 8962, 'epoch': 2} {'type': 'loss', 'content': 0.1507301777601242, 'timestamp': '2025-09-30 22:25:42.585268', 'step': 8963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:42.645698', 'step': 8963, 'epoch': 2} {'type': 'loss', 'content': 0.0891074687242508, 'timestamp': '2025-09-30 22:25:42.653060', 'step': 8964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:42.716523', 'step': 8964, 'epoch': 2} {'type': 'loss', 'content': 0.13766391575336456, 'timestamp': '2025-09-30 22:25:42.726825', 'step': 8965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:42.786872', 'step': 8965, 'epoch': 2} {'type': 'loss', 'content': 0.13334234058856964, 'timestamp': '2025-09-30 22:25:42.790890', 'step': 8966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:42.858258', 'step': 8966, 'epoch': 2} {'type': 'loss', 'content': 0.10962327569723129, 'timestamp': '2025-09-30 22:25:42.860649', 'step': 8967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:42.920432', 'step': 8967, 'epoch': 2} {'type': 'loss', 'content': 0.15681059658527374, 'timestamp': '2025-09-30 22:25:42.930783', 'step': 8968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:42.993448', 'step': 8968, 'epoch': 2} {'type': 'loss', 'content': 0.12125634402036667, 'timestamp': '2025-09-30 22:25:42.998058', 'step': 8969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:43.074350', 'step': 8969, 'epoch': 2} {'type': 'loss', 'content': 0.129311203956604, 'timestamp': '2025-09-30 22:25:43.076921', 'step': 8970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:25:43.149521', 'step': 8970, 'epoch': 2} {'type': 'loss', 'content': 0.07709681242704391, 'timestamp': '2025-09-30 22:25:43.151729', 'step': 8971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.216781', 'step': 8971, 'epoch': 2} {'type': 'loss', 'content': 0.1393873691558838, 'timestamp': '2025-09-30 22:25:43.223213', 'step': 8972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.284164', 'step': 8972, 'epoch': 2} {'type': 'loss', 'content': 0.16082540154457092, 'timestamp': '2025-09-30 22:25:43.288041', 'step': 8973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.361967', 'step': 8973, 'epoch': 2} {'type': 'loss', 'content': 0.2753678560256958, 'timestamp': '2025-09-30 22:25:43.365225', 'step': 8974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:43.426992', 'step': 8974, 'epoch': 2} {'type': 'loss', 'content': 0.1316419094800949, 'timestamp': '2025-09-30 22:25:43.432296', 'step': 8975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.497973', 'step': 8975, 'epoch': 2} {'type': 'loss', 'content': 0.19561819732189178, 'timestamp': '2025-09-30 22:25:43.505681', 'step': 8976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.565753', 'step': 8976, 'epoch': 2} {'type': 'loss', 'content': 0.09460461884737015, 'timestamp': '2025-09-30 22:25:43.568758', 'step': 8977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.648621', 'step': 8977, 'epoch': 2} {'type': 'loss', 'content': 0.15387946367263794, 'timestamp': '2025-09-30 22:25:43.655832', 'step': 8978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:43.721529', 'step': 8978, 'epoch': 2} {'type': 'loss', 'content': 0.28341156244277954, 'timestamp': '2025-09-30 22:25:43.727270', 'step': 8979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.790846', 'step': 8979, 'epoch': 2} {'type': 'loss', 'content': 0.07918227463960648, 'timestamp': '2025-09-30 22:25:43.800608', 'step': 8980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:43.882018', 'step': 8980, 'epoch': 2} {'type': 'loss', 'content': 0.03277338668704033, 'timestamp': '2025-09-30 22:25:43.885027', 'step': 8981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:43.946805', 'step': 8981, 'epoch': 2} {'type': 'loss', 'content': 0.15183468163013458, 'timestamp': '2025-09-30 22:25:43.952756', 'step': 8982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:44.042265', 'step': 8982, 'epoch': 2} {'type': 'loss', 'content': 0.14557594060897827, 'timestamp': '2025-09-30 22:25:44.045617', 'step': 8983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:44.116220', 'step': 8983, 'epoch': 2} {'type': 'loss', 'content': 0.1460997313261032, 'timestamp': '2025-09-30 22:25:44.126671', 'step': 8984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:44.192551', 'step': 8984, 'epoch': 2} {'type': 'loss', 'content': 0.1586715131998062, 'timestamp': '2025-09-30 22:25:44.200073', 'step': 8985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:44.258745', 'step': 8985, 'epoch': 2} {'type': 'loss', 'content': 0.1880451738834381, 'timestamp': '2025-09-30 22:25:44.262862', 'step': 8986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:44.323203', 'step': 8986, 'epoch': 2} {'type': 'loss', 'content': 0.1799539476633072, 'timestamp': '2025-09-30 22:25:44.332494', 'step': 8987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:44.399112', 'step': 8987, 'epoch': 2} {'type': 'loss', 'content': 0.11833609640598297, 'timestamp': '2025-09-30 22:25:44.408064', 'step': 8988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:44.466792', 'step': 8988, 'epoch': 2} {'type': 'loss', 'content': 0.11225005239248276, 'timestamp': '2025-09-30 22:25:44.469240', 'step': 8989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:44.531039', 'step': 8989, 'epoch': 2} {'type': 'loss', 'content': 0.08374741673469543, 'timestamp': '2025-09-30 22:25:44.537867', 'step': 8990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:44.601083', 'step': 8990, 'epoch': 2} {'type': 'loss', 'content': 0.1008494645357132, 'timestamp': '2025-09-30 22:25:44.603698', 'step': 8991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:44.665673', 'step': 8991, 'epoch': 2} {'type': 'loss', 'content': 0.11130954325199127, 'timestamp': '2025-09-30 22:25:44.672284', 'step': 8992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:44.729809', 'step': 8992, 'epoch': 2} {'type': 'loss', 'content': 0.1302066147327423, 'timestamp': '2025-09-30 22:25:44.737751', 'step': 8993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:44.796643', 'step': 8993, 'epoch': 2} {'type': 'loss', 'content': 0.08971678465604782, 'timestamp': '2025-09-30 22:25:44.802862', 'step': 8994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:44.871695', 'step': 8994, 'epoch': 2} {'type': 'loss', 'content': 0.15669067203998566, 'timestamp': '2025-09-30 22:25:44.874741', 'step': 8995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:44.936614', 'step': 8995, 'epoch': 2} {'type': 'loss', 'content': 0.06618435680866241, 'timestamp': '2025-09-30 22:25:44.942742', 'step': 8996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:45.013965', 'step': 8996, 'epoch': 2} {'type': 'loss', 'content': 0.07857874780893326, 'timestamp': '2025-09-30 22:25:45.022317', 'step': 8997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:45.079896', 'step': 8997, 'epoch': 2} {'type': 'loss', 'content': 0.25609907507896423, 'timestamp': '2025-09-30 22:25:45.086926', 'step': 8998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:45.150005', 'step': 8998, 'epoch': 2} {'type': 'loss', 'content': 0.13840796053409576, 'timestamp': '2025-09-30 22:25:45.152724', 'step': 8999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:45.214315', 'step': 8999, 'epoch': 2} {'type': 'loss', 'content': 0.13191170990467072, 'timestamp': '2025-09-30 22:25:45.220988', 'step': 9000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9000', 'timestamp': '2025-09-30 22:25:45.848485', 'step': 9000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:45.923650', 'step': 9000, 'epoch': 2} {'type': 'loss', 'content': 0.20002202689647675, 'timestamp': '2025-09-30 22:25:45.928749', 'step': 9001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:45.995590', 'step': 9001, 'epoch': 2} {'type': 'loss', 'content': 0.14301414787769318, 'timestamp': '2025-09-30 22:25:46.001772', 'step': 9002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:46.077807', 'step': 9002, 'epoch': 2} {'type': 'loss', 'content': 0.08133836090564728, 'timestamp': '2025-09-30 22:25:46.083946', 'step': 9003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:46.143244', 'step': 9003, 'epoch': 2} {'type': 'loss', 'content': 0.0976535826921463, 'timestamp': '2025-09-30 22:25:46.149946', 'step': 9004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:46.215543', 'step': 9004, 'epoch': 2} {'type': 'loss', 'content': 0.24238118529319763, 'timestamp': '2025-09-30 22:25:46.218008', 'step': 9005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:46.275400', 'step': 9005, 'epoch': 2} {'type': 'loss', 'content': 0.06252554059028625, 'timestamp': '2025-09-30 22:25:46.281917', 'step': 9006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:46.352633', 'step': 9006, 'epoch': 2} {'type': 'loss', 'content': 0.07609161734580994, 'timestamp': '2025-09-30 22:25:46.356263', 'step': 9007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:46.415584', 'step': 9007, 'epoch': 2} {'type': 'loss', 'content': 0.13646894693374634, 'timestamp': '2025-09-30 22:25:46.431274', 'step': 9008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:46.490056', 'step': 9008, 'epoch': 2} {'type': 'loss', 'content': 0.15437297523021698, 'timestamp': '2025-09-30 22:25:46.492443', 'step': 9009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:46.550928', 'step': 9009, 'epoch': 2} {'type': 'loss', 'content': 0.10052630305290222, 'timestamp': '2025-09-30 22:25:46.560875', 'step': 9010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:46.618951', 'step': 9010, 'epoch': 2} {'type': 'loss', 'content': 0.09594248980283737, 'timestamp': '2025-09-30 22:25:46.621651', 'step': 9011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:46.700302', 'step': 9011, 'epoch': 2} {'type': 'loss', 'content': 0.05124610662460327, 'timestamp': '2025-09-30 22:25:46.707348', 'step': 9012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:46.769044', 'step': 9012, 'epoch': 2} {'type': 'loss', 'content': 0.13915470242500305, 'timestamp': '2025-09-30 22:25:46.772557', 'step': 9013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:46.834147', 'step': 9013, 'epoch': 2} {'type': 'loss', 'content': 0.13845859467983246, 'timestamp': '2025-09-30 22:25:46.838148', 'step': 9014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:46.914149', 'step': 9014, 'epoch': 2} {'type': 'loss', 'content': 0.21837760508060455, 'timestamp': '2025-09-30 22:25:46.916650', 'step': 9015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:46.978772', 'step': 9015, 'epoch': 2} {'type': 'loss', 'content': 0.1657567322254181, 'timestamp': '2025-09-30 22:25:46.985601', 'step': 9016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:47.043673', 'step': 9016, 'epoch': 2} {'type': 'loss', 'content': 0.12104218453168869, 'timestamp': '2025-09-30 22:25:47.052122', 'step': 9017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:47.112570', 'step': 9017, 'epoch': 2} {'type': 'loss', 'content': 0.06833049654960632, 'timestamp': '2025-09-30 22:25:47.115445', 'step': 9018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:47.176564', 'step': 9018, 'epoch': 2} {'type': 'loss', 'content': 0.12725013494491577, 'timestamp': '2025-09-30 22:25:47.178811', 'step': 9019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:47.237067', 'step': 9019, 'epoch': 2} {'type': 'loss', 'content': 0.19265809655189514, 'timestamp': '2025-09-30 22:25:47.247269', 'step': 9020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:47.323943', 'step': 9020, 'epoch': 2} {'type': 'loss', 'content': 0.1206468865275383, 'timestamp': '2025-09-30 22:25:47.330103', 'step': 9021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:47.397526', 'step': 9021, 'epoch': 2} {'type': 'loss', 'content': 0.10067316889762878, 'timestamp': '2025-09-30 22:25:47.403662', 'step': 9022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:47.466761', 'step': 9022, 'epoch': 2} {'type': 'loss', 'content': 0.12387792766094208, 'timestamp': '2025-09-30 22:25:47.469581', 'step': 9023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:47.527614', 'step': 9023, 'epoch': 2} {'type': 'loss', 'content': 0.12367181479930878, 'timestamp': '2025-09-30 22:25:47.539088', 'step': 9024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:47.611534', 'step': 9024, 'epoch': 2} {'type': 'loss', 'content': 0.1037127897143364, 'timestamp': '2025-09-30 22:25:47.616053', 'step': 9025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:47.676117', 'step': 9025, 'epoch': 2} {'type': 'loss', 'content': 0.18022102117538452, 'timestamp': '2025-09-30 22:25:47.684671', 'step': 9026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:47.747841', 'step': 9026, 'epoch': 2} {'type': 'loss', 'content': 0.08081786334514618, 'timestamp': '2025-09-30 22:25:47.751563', 'step': 9027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:47.822231', 'step': 9027, 'epoch': 2} {'type': 'loss', 'content': 0.14578968286514282, 'timestamp': '2025-09-30 22:25:47.829520', 'step': 9028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:47.889519', 'step': 9028, 'epoch': 2} {'type': 'loss', 'content': 0.13757745921611786, 'timestamp': '2025-09-30 22:25:47.893242', 'step': 9029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:47.952844', 'step': 9029, 'epoch': 2} {'type': 'loss', 'content': 0.15275509655475616, 'timestamp': '2025-09-30 22:25:47.956758', 'step': 9030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:48.022200', 'step': 9030, 'epoch': 2} {'type': 'loss', 'content': 0.09685015678405762, 'timestamp': '2025-09-30 22:25:48.030847', 'step': 9031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:48.090311', 'step': 9031, 'epoch': 2} {'type': 'loss', 'content': 0.18705399334430695, 'timestamp': '2025-09-30 22:25:48.096948', 'step': 9032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:48.162429', 'step': 9032, 'epoch': 2} {'type': 'loss', 'content': 0.11872702091932297, 'timestamp': '2025-09-30 22:25:48.165247', 'step': 9033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:25:48.231375', 'step': 9033, 'epoch': 2} {'type': 'loss', 'content': 0.08708178251981735, 'timestamp': '2025-09-30 22:25:48.239387', 'step': 9034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:48.303976', 'step': 9034, 'epoch': 2} {'type': 'loss', 'content': 0.15986627340316772, 'timestamp': '2025-09-30 22:25:48.308099', 'step': 9035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:48.383544', 'step': 9035, 'epoch': 2} {'type': 'loss', 'content': 0.18228693306446075, 'timestamp': '2025-09-30 22:25:48.398648', 'step': 9036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:48.472035', 'step': 9036, 'epoch': 2} {'type': 'loss', 'content': 0.14084558188915253, 'timestamp': '2025-09-30 22:25:48.476646', 'step': 9037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:48.534900', 'step': 9037, 'epoch': 2} {'type': 'loss', 'content': 0.14285258948802948, 'timestamp': '2025-09-30 22:25:48.548597', 'step': 9038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:48.608443', 'step': 9038, 'epoch': 2} {'type': 'loss', 'content': 0.1355188488960266, 'timestamp': '2025-09-30 22:25:48.612473', 'step': 9039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:48.679726', 'step': 9039, 'epoch': 2} {'type': 'loss', 'content': 0.1346617043018341, 'timestamp': '2025-09-30 22:25:48.686462', 'step': 9040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:48.748171', 'step': 9040, 'epoch': 2} {'type': 'loss', 'content': 0.1189921423792839, 'timestamp': '2025-09-30 22:25:48.755314', 'step': 9041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:48.813795', 'step': 9041, 'epoch': 2} {'type': 'loss', 'content': 0.16921421885490417, 'timestamp': '2025-09-30 22:25:48.821766', 'step': 9042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:48.890326', 'step': 9042, 'epoch': 2} {'type': 'loss', 'content': 0.17331631481647491, 'timestamp': '2025-09-30 22:25:48.893358', 'step': 9043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:48.961890', 'step': 9043, 'epoch': 2} {'type': 'loss', 'content': 0.11309341341257095, 'timestamp': '2025-09-30 22:25:48.968417', 'step': 9044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:49.029062', 'step': 9044, 'epoch': 2} {'type': 'loss', 'content': 0.12187231332063675, 'timestamp': '2025-09-30 22:25:49.031289', 'step': 9045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:49.089542', 'step': 9045, 'epoch': 2} {'type': 'loss', 'content': 0.11888867616653442, 'timestamp': '2025-09-30 22:25:49.095788', 'step': 9046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:49.170718', 'step': 9046, 'epoch': 2} {'type': 'loss', 'content': 0.19520840048789978, 'timestamp': '2025-09-30 22:25:49.174904', 'step': 9047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:49.250183', 'step': 9047, 'epoch': 2} {'type': 'loss', 'content': 0.11171585321426392, 'timestamp': '2025-09-30 22:25:49.276304', 'step': 9048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:49.339940', 'step': 9048, 'epoch': 2} {'type': 'loss', 'content': 0.08741987496614456, 'timestamp': '2025-09-30 22:25:49.345877', 'step': 9049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:49.405578', 'step': 9049, 'epoch': 2} {'type': 'loss', 'content': 0.172989621758461, 'timestamp': '2025-09-30 22:25:49.411491', 'step': 9050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:49.481050', 'step': 9050, 'epoch': 2} {'type': 'loss', 'content': 0.1304345577955246, 'timestamp': '2025-09-30 22:25:49.488148', 'step': 9051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:49.567593', 'step': 9051, 'epoch': 2} {'type': 'loss', 'content': 0.19936124980449677, 'timestamp': '2025-09-30 22:25:49.575845', 'step': 9052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:49.654196', 'step': 9052, 'epoch': 2} {'type': 'loss', 'content': 0.12569846212863922, 'timestamp': '2025-09-30 22:25:49.657565', 'step': 9053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:49.725743', 'step': 9053, 'epoch': 2} {'type': 'loss', 'content': 0.22803299129009247, 'timestamp': '2025-09-30 22:25:49.731257', 'step': 9054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:49.793396', 'step': 9054, 'epoch': 2} {'type': 'loss', 'content': 0.11383595317602158, 'timestamp': '2025-09-30 22:25:49.798979', 'step': 9055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:49.860865', 'step': 9055, 'epoch': 2} {'type': 'loss', 'content': 0.17013107240200043, 'timestamp': '2025-09-30 22:25:49.880035', 'step': 9056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:49.940975', 'step': 9056, 'epoch': 2} {'type': 'loss', 'content': 0.17109130322933197, 'timestamp': '2025-09-30 22:25:49.945947', 'step': 9057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:50.018273', 'step': 9057, 'epoch': 2} {'type': 'loss', 'content': 0.11337330937385559, 'timestamp': '2025-09-30 22:25:50.023964', 'step': 9058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:50.097958', 'step': 9058, 'epoch': 2} {'type': 'loss', 'content': 0.17063702642917633, 'timestamp': '2025-09-30 22:25:50.100862', 'step': 9059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:50.159317', 'step': 9059, 'epoch': 2} {'type': 'loss', 'content': 0.14853918552398682, 'timestamp': '2025-09-30 22:25:50.166739', 'step': 9060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:50.226761', 'step': 9060, 'epoch': 2} {'type': 'loss', 'content': 0.11476028710603714, 'timestamp': '2025-09-30 22:25:50.230515', 'step': 9061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:50.291684', 'step': 9061, 'epoch': 2} {'type': 'loss', 'content': 0.14628976583480835, 'timestamp': '2025-09-30 22:25:50.294470', 'step': 9062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:25:50.365723', 'step': 9062, 'epoch': 2} {'type': 'loss', 'content': 0.10431858152151108, 'timestamp': '2025-09-30 22:25:50.374330', 'step': 9063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:50.432160', 'step': 9063, 'epoch': 2} {'type': 'loss', 'content': 0.14083325862884521, 'timestamp': '2025-09-30 22:25:50.438381', 'step': 9064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:50.495923', 'step': 9064, 'epoch': 2} {'type': 'loss', 'content': 0.08190735429525375, 'timestamp': '2025-09-30 22:25:50.505511', 'step': 9065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:50.581574', 'step': 9065, 'epoch': 2} {'type': 'loss', 'content': 0.1313687562942505, 'timestamp': '2025-09-30 22:25:50.584975', 'step': 9066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:50.643711', 'step': 9066, 'epoch': 2} {'type': 'loss', 'content': 0.07855400443077087, 'timestamp': '2025-09-30 22:25:50.650408', 'step': 9067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:50.707827', 'step': 9067, 'epoch': 2} {'type': 'loss', 'content': 0.16371190547943115, 'timestamp': '2025-09-30 22:25:50.720701', 'step': 9068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:50.777838', 'step': 9068, 'epoch': 2} {'type': 'loss', 'content': 0.1446177065372467, 'timestamp': '2025-09-30 22:25:50.781085', 'step': 9069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:50.848518', 'step': 9069, 'epoch': 2} {'type': 'loss', 'content': 0.14427419006824493, 'timestamp': '2025-09-30 22:25:50.851469', 'step': 9070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:50.910612', 'step': 9070, 'epoch': 2} {'type': 'loss', 'content': 0.1857144683599472, 'timestamp': '2025-09-30 22:25:50.915701', 'step': 9071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:50.975557', 'step': 9071, 'epoch': 2} {'type': 'loss', 'content': 0.11261194199323654, 'timestamp': '2025-09-30 22:25:50.982006', 'step': 9072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.044188', 'step': 9072, 'epoch': 2} {'type': 'loss', 'content': 0.10960499197244644, 'timestamp': '2025-09-30 22:25:51.047132', 'step': 9073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:51.107242', 'step': 9073, 'epoch': 2} {'type': 'loss', 'content': 0.10197478532791138, 'timestamp': '2025-09-30 22:25:51.110258', 'step': 9074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.170100', 'step': 9074, 'epoch': 2} {'type': 'loss', 'content': 0.2893511950969696, 'timestamp': '2025-09-30 22:25:51.175390', 'step': 9075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.236440', 'step': 9075, 'epoch': 2} {'type': 'loss', 'content': 0.11624722927808762, 'timestamp': '2025-09-30 22:25:51.244302', 'step': 9076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.304789', 'step': 9076, 'epoch': 2} {'type': 'loss', 'content': 0.14041510224342346, 'timestamp': '2025-09-30 22:25:51.316877', 'step': 9077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.381783', 'step': 9077, 'epoch': 2} {'type': 'loss', 'content': 0.1858358234167099, 'timestamp': '2025-09-30 22:25:51.389386', 'step': 9078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:51.447691', 'step': 9078, 'epoch': 2} {'type': 'loss', 'content': 0.10576415061950684, 'timestamp': '2025-09-30 22:25:51.450601', 'step': 9079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:51.525801', 'step': 9079, 'epoch': 2} {'type': 'loss', 'content': 0.10962661355733871, 'timestamp': '2025-09-30 22:25:51.535594', 'step': 9080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:51.592721', 'step': 9080, 'epoch': 2} {'type': 'loss', 'content': 0.1692979484796524, 'timestamp': '2025-09-30 22:25:51.598857', 'step': 9081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:51.659737', 'step': 9081, 'epoch': 2} {'type': 'loss', 'content': 0.14171667397022247, 'timestamp': '2025-09-30 22:25:51.661922', 'step': 9082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:51.720134', 'step': 9082, 'epoch': 2} {'type': 'loss', 'content': 0.14693810045719147, 'timestamp': '2025-09-30 22:25:51.723628', 'step': 9083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.781640', 'step': 9083, 'epoch': 2} {'type': 'loss', 'content': 0.22703498601913452, 'timestamp': '2025-09-30 22:25:51.788265', 'step': 9084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.845954', 'step': 9084, 'epoch': 2} {'type': 'loss', 'content': 0.09582404047250748, 'timestamp': '2025-09-30 22:25:51.852037', 'step': 9085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:51.913501', 'step': 9085, 'epoch': 2} {'type': 'loss', 'content': 0.10257167369127274, 'timestamp': '2025-09-30 22:25:51.917884', 'step': 9086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:51.974859', 'step': 9086, 'epoch': 2} {'type': 'loss', 'content': 0.11423670500516891, 'timestamp': '2025-09-30 22:25:51.977393', 'step': 9087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:52.036574', 'step': 9087, 'epoch': 2} {'type': 'loss', 'content': 0.09386149793863297, 'timestamp': '2025-09-30 22:25:52.042806', 'step': 9088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:52.099614', 'step': 9088, 'epoch': 2} {'type': 'loss', 'content': 0.10068963468074799, 'timestamp': '2025-09-30 22:25:52.102148', 'step': 9089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:52.158570', 'step': 9089, 'epoch': 2} {'type': 'loss', 'content': 0.16340947151184082, 'timestamp': '2025-09-30 22:25:52.161028', 'step': 9090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:52.218642', 'step': 9090, 'epoch': 2} {'type': 'loss', 'content': 0.08610808849334717, 'timestamp': '2025-09-30 22:25:52.223009', 'step': 9091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:52.284001', 'step': 9091, 'epoch': 2} {'type': 'loss', 'content': 0.13296833634376526, 'timestamp': '2025-09-30 22:25:52.292206', 'step': 9092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:52.351826', 'step': 9092, 'epoch': 2} {'type': 'loss', 'content': 0.1191750168800354, 'timestamp': '2025-09-30 22:25:52.357250', 'step': 9093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:52.419547', 'step': 9093, 'epoch': 2} {'type': 'loss', 'content': 0.13539820909500122, 'timestamp': '2025-09-30 22:25:52.422207', 'step': 9094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:52.488287', 'step': 9094, 'epoch': 2} {'type': 'loss', 'content': 0.12401493638753891, 'timestamp': '2025-09-30 22:25:52.491220', 'step': 9095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:52.560909', 'step': 9095, 'epoch': 2} {'type': 'loss', 'content': 0.10941939055919647, 'timestamp': '2025-09-30 22:25:52.567525', 'step': 9096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:52.628823', 'step': 9096, 'epoch': 2} {'type': 'loss', 'content': 0.22406832873821259, 'timestamp': '2025-09-30 22:25:52.631649', 'step': 9097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:52.690772', 'step': 9097, 'epoch': 2} {'type': 'loss', 'content': 0.25893643498420715, 'timestamp': '2025-09-30 22:25:52.699187', 'step': 9098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:52.766872', 'step': 9098, 'epoch': 2} {'type': 'loss', 'content': 0.191720649600029, 'timestamp': '2025-09-30 22:25:52.775002', 'step': 9099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:52.849121', 'step': 9099, 'epoch': 2} {'type': 'loss', 'content': 0.1773834377527237, 'timestamp': '2025-09-30 22:25:52.856428', 'step': 9100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:52.928779', 'step': 9100, 'epoch': 2} {'type': 'loss', 'content': 0.16691608726978302, 'timestamp': '2025-09-30 22:25:52.935515', 'step': 9101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:52.996853', 'step': 9101, 'epoch': 2} {'type': 'loss', 'content': 0.11115159094333649, 'timestamp': '2025-09-30 22:25:52.999992', 'step': 9102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.058427', 'step': 9102, 'epoch': 2} {'type': 'loss', 'content': 0.1535118818283081, 'timestamp': '2025-09-30 22:25:53.060748', 'step': 9103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:53.121155', 'step': 9103, 'epoch': 2} {'type': 'loss', 'content': 0.19669315218925476, 'timestamp': '2025-09-30 22:25:53.128091', 'step': 9104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:53.196722', 'step': 9104, 'epoch': 2} {'type': 'loss', 'content': 0.09197928011417389, 'timestamp': '2025-09-30 22:25:53.211176', 'step': 9105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.268960', 'step': 9105, 'epoch': 2} {'type': 'loss', 'content': 0.07861979305744171, 'timestamp': '2025-09-30 22:25:53.272968', 'step': 9106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.331840', 'step': 9106, 'epoch': 2} {'type': 'loss', 'content': 0.08434118330478668, 'timestamp': '2025-09-30 22:25:53.335164', 'step': 9107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:53.392283', 'step': 9107, 'epoch': 2} {'type': 'loss', 'content': 0.11378448456525803, 'timestamp': '2025-09-30 22:25:53.400054', 'step': 9108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.458107', 'step': 9108, 'epoch': 2} {'type': 'loss', 'content': 0.126304492354393, 'timestamp': '2025-09-30 22:25:53.462317', 'step': 9109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.541448', 'step': 9109, 'epoch': 2} {'type': 'loss', 'content': 0.14057756960391998, 'timestamp': '2025-09-30 22:25:53.544663', 'step': 9110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.607410', 'step': 9110, 'epoch': 2} {'type': 'loss', 'content': 0.13703292608261108, 'timestamp': '2025-09-30 22:25:53.614151', 'step': 9111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.677554', 'step': 9111, 'epoch': 2} {'type': 'loss', 'content': 0.06576406210660934, 'timestamp': '2025-09-30 22:25:53.691974', 'step': 9112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:53.750286', 'step': 9112, 'epoch': 2} {'type': 'loss', 'content': 0.15221311151981354, 'timestamp': '2025-09-30 22:25:53.753423', 'step': 9113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.814162', 'step': 9113, 'epoch': 2} {'type': 'loss', 'content': 0.08347956091165543, 'timestamp': '2025-09-30 22:25:53.816688', 'step': 9114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:53.884286', 'step': 9114, 'epoch': 2} {'type': 'loss', 'content': 0.19469845294952393, 'timestamp': '2025-09-30 22:25:53.892207', 'step': 9115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:53.961126', 'step': 9115, 'epoch': 2} {'type': 'loss', 'content': 0.1689099222421646, 'timestamp': '2025-09-30 22:25:53.967934', 'step': 9116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:54.033064', 'step': 9116, 'epoch': 2} {'type': 'loss', 'content': 0.07551272213459015, 'timestamp': '2025-09-30 22:25:54.039253', 'step': 9117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:54.099695', 'step': 9117, 'epoch': 2} {'type': 'loss', 'content': 0.08400003612041473, 'timestamp': '2025-09-30 22:25:54.108793', 'step': 9118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:54.172479', 'step': 9118, 'epoch': 2} {'type': 'loss', 'content': 0.13277482986450195, 'timestamp': '2025-09-30 22:25:54.186439', 'step': 9119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:54.248675', 'step': 9119, 'epoch': 2} {'type': 'loss', 'content': 0.15011540055274963, 'timestamp': '2025-09-30 22:25:54.260668', 'step': 9120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:54.336655', 'step': 9120, 'epoch': 2} {'type': 'loss', 'content': 0.09552695602178574, 'timestamp': '2025-09-30 22:25:54.339713', 'step': 9121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:54.401468', 'step': 9121, 'epoch': 2} {'type': 'loss', 'content': 0.12979906797409058, 'timestamp': '2025-09-30 22:25:54.407750', 'step': 9122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:54.468474', 'step': 9122, 'epoch': 2} {'type': 'loss', 'content': 0.09692446887493134, 'timestamp': '2025-09-30 22:25:54.470930', 'step': 9123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:54.534757', 'step': 9123, 'epoch': 2} {'type': 'loss', 'content': 0.11146444827318192, 'timestamp': '2025-09-30 22:25:54.545924', 'step': 9124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:54.613528', 'step': 9124, 'epoch': 2} {'type': 'loss', 'content': 0.09602609276771545, 'timestamp': '2025-09-30 22:25:54.618132', 'step': 9125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:54.679203', 'step': 9125, 'epoch': 2} {'type': 'loss', 'content': 0.0854811817407608, 'timestamp': '2025-09-30 22:25:54.688360', 'step': 9126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:54.763973', 'step': 9126, 'epoch': 2} {'type': 'loss', 'content': 0.1573159098625183, 'timestamp': '2025-09-30 22:25:54.774031', 'step': 9127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:54.834292', 'step': 9127, 'epoch': 2} {'type': 'loss', 'content': 0.14774614572525024, 'timestamp': '2025-09-30 22:25:54.845006', 'step': 9128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:54.919676', 'step': 9128, 'epoch': 2} {'type': 'loss', 'content': 0.07491053640842438, 'timestamp': '2025-09-30 22:25:54.926748', 'step': 9129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:54.991168', 'step': 9129, 'epoch': 2} {'type': 'loss', 'content': 0.1228063553571701, 'timestamp': '2025-09-30 22:25:54.996957', 'step': 9130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:55.056310', 'step': 9130, 'epoch': 2} {'type': 'loss', 'content': 0.13718685507774353, 'timestamp': '2025-09-30 22:25:55.062881', 'step': 9131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:55.125543', 'step': 9131, 'epoch': 2} {'type': 'loss', 'content': 0.17468403279781342, 'timestamp': '2025-09-30 22:25:55.135183', 'step': 9132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:55.196794', 'step': 9132, 'epoch': 2} {'type': 'loss', 'content': 0.10516691952943802, 'timestamp': '2025-09-30 22:25:55.202630', 'step': 9133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:55.262134', 'step': 9133, 'epoch': 2} {'type': 'loss', 'content': 0.10660913586616516, 'timestamp': '2025-09-30 22:25:55.265083', 'step': 9134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:55.328860', 'step': 9134, 'epoch': 2} {'type': 'loss', 'content': 0.20545171201229095, 'timestamp': '2025-09-30 22:25:55.335051', 'step': 9135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:55.399040', 'step': 9135, 'epoch': 2} {'type': 'loss', 'content': 0.11361253261566162, 'timestamp': '2025-09-30 22:25:55.406346', 'step': 9136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:55.465569', 'step': 9136, 'epoch': 2} {'type': 'loss', 'content': 0.10429630428552628, 'timestamp': '2025-09-30 22:25:55.470630', 'step': 9137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:55.552702', 'step': 9137, 'epoch': 2} {'type': 'loss', 'content': 0.08617322891950607, 'timestamp': '2025-09-30 22:25:55.557975', 'step': 9138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:25:55.645686', 'step': 9138, 'epoch': 2} {'type': 'loss', 'content': 0.11436410248279572, 'timestamp': '2025-09-30 22:25:55.650318', 'step': 9139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:55.725082', 'step': 9139, 'epoch': 2} {'type': 'loss', 'content': 0.16071951389312744, 'timestamp': '2025-09-30 22:25:55.737004', 'step': 9140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:55.797774', 'step': 9140, 'epoch': 2} {'type': 'loss', 'content': 0.09929731488227844, 'timestamp': '2025-09-30 22:25:55.800792', 'step': 9141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:55.863289', 'step': 9141, 'epoch': 2} {'type': 'loss', 'content': 0.07104543596506119, 'timestamp': '2025-09-30 22:25:55.869959', 'step': 9142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:55.940588', 'step': 9142, 'epoch': 2} {'type': 'loss', 'content': 0.08300324529409409, 'timestamp': '2025-09-30 22:25:55.945556', 'step': 9143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:56.003164', 'step': 9143, 'epoch': 2} {'type': 'loss', 'content': 0.1595279425382614, 'timestamp': '2025-09-30 22:25:56.009281', 'step': 9144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:56.080152', 'step': 9144, 'epoch': 2} {'type': 'loss', 'content': 0.23692584037780762, 'timestamp': '2025-09-30 22:25:56.083199', 'step': 9145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:56.143062', 'step': 9145, 'epoch': 2} {'type': 'loss', 'content': 0.09467032551765442, 'timestamp': '2025-09-30 22:25:56.148634', 'step': 9146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:56.209949', 'step': 9146, 'epoch': 2} {'type': 'loss', 'content': 0.22655493021011353, 'timestamp': '2025-09-30 22:25:56.212279', 'step': 9147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:56.272258', 'step': 9147, 'epoch': 2} {'type': 'loss', 'content': 0.05327506363391876, 'timestamp': '2025-09-30 22:25:56.281655', 'step': 9148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:56.340866', 'step': 9148, 'epoch': 2} {'type': 'loss', 'content': 0.0995219498872757, 'timestamp': '2025-09-30 22:25:56.344123', 'step': 9149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:56.401173', 'step': 9149, 'epoch': 2} {'type': 'loss', 'content': 0.19320634007453918, 'timestamp': '2025-09-30 22:25:56.404145', 'step': 9150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:56.463276', 'step': 9150, 'epoch': 2} {'type': 'loss', 'content': 0.21921482682228088, 'timestamp': '2025-09-30 22:25:56.470976', 'step': 9151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:56.528272', 'step': 9151, 'epoch': 2} {'type': 'loss', 'content': 0.0963887944817543, 'timestamp': '2025-09-30 22:25:56.538599', 'step': 9152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:56.602655', 'step': 9152, 'epoch': 2} {'type': 'loss', 'content': 0.20629170536994934, 'timestamp': '2025-09-30 22:25:56.610056', 'step': 9153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:56.672467', 'step': 9153, 'epoch': 2} {'type': 'loss', 'content': 0.10346357524394989, 'timestamp': '2025-09-30 22:25:56.681174', 'step': 9154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:56.743698', 'step': 9154, 'epoch': 2} {'type': 'loss', 'content': 0.1144113764166832, 'timestamp': '2025-09-30 22:25:56.749566', 'step': 9155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:56.807956', 'step': 9155, 'epoch': 2} {'type': 'loss', 'content': 0.07168855518102646, 'timestamp': '2025-09-30 22:25:56.819570', 'step': 9156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:56.877402', 'step': 9156, 'epoch': 2} {'type': 'loss', 'content': 0.13338400423526764, 'timestamp': '2025-09-30 22:25:56.884920', 'step': 9157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:56.946857', 'step': 9157, 'epoch': 2} {'type': 'loss', 'content': 0.12259169667959213, 'timestamp': '2025-09-30 22:25:56.951206', 'step': 9158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:57.012073', 'step': 9158, 'epoch': 2} {'type': 'loss', 'content': 0.07525981962680817, 'timestamp': '2025-09-30 22:25:57.014541', 'step': 9159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:57.071308', 'step': 9159, 'epoch': 2} {'type': 'loss', 'content': 0.20595791935920715, 'timestamp': '2025-09-30 22:25:57.078726', 'step': 9160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:57.138676', 'step': 9160, 'epoch': 2} {'type': 'loss', 'content': 0.04847375676035881, 'timestamp': '2025-09-30 22:25:57.146365', 'step': 9161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:57.208456', 'step': 9161, 'epoch': 2} {'type': 'loss', 'content': 0.114432692527771, 'timestamp': '2025-09-30 22:25:57.211545', 'step': 9162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:57.269529', 'step': 9162, 'epoch': 2} {'type': 'loss', 'content': 0.09078598022460938, 'timestamp': '2025-09-30 22:25:57.279084', 'step': 9163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:57.341958', 'step': 9163, 'epoch': 2} {'type': 'loss', 'content': 0.10096273571252823, 'timestamp': '2025-09-30 22:25:57.348293', 'step': 9164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:57.423370', 'step': 9164, 'epoch': 2} {'type': 'loss', 'content': 0.23801955580711365, 'timestamp': '2025-09-30 22:25:57.432001', 'step': 9165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:57.494785', 'step': 9165, 'epoch': 2} {'type': 'loss', 'content': 0.1590912938117981, 'timestamp': '2025-09-30 22:25:57.503440', 'step': 9166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:25:57.576051', 'step': 9166, 'epoch': 2} {'type': 'loss', 'content': 0.1526288390159607, 'timestamp': '2025-09-30 22:25:57.579460', 'step': 9167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:57.644302', 'step': 9167, 'epoch': 2} {'type': 'loss', 'content': 0.10664226114749908, 'timestamp': '2025-09-30 22:25:57.655873', 'step': 9168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:57.719458', 'step': 9168, 'epoch': 2} {'type': 'loss', 'content': 0.13107140362262726, 'timestamp': '2025-09-30 22:25:57.727901', 'step': 9169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:57.791188', 'step': 9169, 'epoch': 2} {'type': 'loss', 'content': 0.14689947664737701, 'timestamp': '2025-09-30 22:25:57.799976', 'step': 9170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:25:57.864216', 'step': 9170, 'epoch': 2} {'type': 'loss', 'content': 0.10726848244667053, 'timestamp': '2025-09-30 22:25:57.871988', 'step': 9171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:57.934417', 'step': 9171, 'epoch': 2} {'type': 'loss', 'content': 0.1599229872226715, 'timestamp': '2025-09-30 22:25:57.940615', 'step': 9172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:58.017745', 'step': 9172, 'epoch': 2} {'type': 'loss', 'content': 0.07660169899463654, 'timestamp': '2025-09-30 22:25:58.026239', 'step': 9173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:58.084408', 'step': 9173, 'epoch': 2} {'type': 'loss', 'content': 0.23180563747882843, 'timestamp': '2025-09-30 22:25:58.093238', 'step': 9174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:58.158230', 'step': 9174, 'epoch': 2} {'type': 'loss', 'content': 0.22583411633968353, 'timestamp': '2025-09-30 22:25:58.162037', 'step': 9175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:58.224994', 'step': 9175, 'epoch': 2} {'type': 'loss', 'content': 0.08042282611131668, 'timestamp': '2025-09-30 22:25:58.230964', 'step': 9176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:58.291441', 'step': 9176, 'epoch': 2} {'type': 'loss', 'content': 0.08251897990703583, 'timestamp': '2025-09-30 22:25:58.299559', 'step': 9177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:58.359227', 'step': 9177, 'epoch': 2} {'type': 'loss', 'content': 0.12535203993320465, 'timestamp': '2025-09-30 22:25:58.362641', 'step': 9178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:58.444660', 'step': 9178, 'epoch': 2} {'type': 'loss', 'content': 0.3413905203342438, 'timestamp': '2025-09-30 22:25:58.456785', 'step': 9179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:58.522757', 'step': 9179, 'epoch': 2} {'type': 'loss', 'content': 0.10127320140600204, 'timestamp': '2025-09-30 22:25:58.529672', 'step': 9180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:58.599625', 'step': 9180, 'epoch': 2} {'type': 'loss', 'content': 0.13860024511814117, 'timestamp': '2025-09-30 22:25:58.603589', 'step': 9181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:58.668090', 'step': 9181, 'epoch': 2} {'type': 'loss', 'content': 0.13692444562911987, 'timestamp': '2025-09-30 22:25:58.671976', 'step': 9182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:58.735252', 'step': 9182, 'epoch': 2} {'type': 'loss', 'content': 0.16409435868263245, 'timestamp': '2025-09-30 22:25:58.738636', 'step': 9183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:58.803178', 'step': 9183, 'epoch': 2} {'type': 'loss', 'content': 0.1495368778705597, 'timestamp': '2025-09-30 22:25:58.809059', 'step': 9184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:58.872869', 'step': 9184, 'epoch': 2} {'type': 'loss', 'content': 0.07506867498159409, 'timestamp': '2025-09-30 22:25:58.875592', 'step': 9185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:58.932698', 'step': 9185, 'epoch': 2} {'type': 'loss', 'content': 0.11853650957345963, 'timestamp': '2025-09-30 22:25:58.940025', 'step': 9186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:59.004220', 'step': 9186, 'epoch': 2} {'type': 'loss', 'content': 0.1320030242204666, 'timestamp': '2025-09-30 22:25:59.008040', 'step': 9187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:59.073822', 'step': 9187, 'epoch': 2} {'type': 'loss', 'content': 0.08954725414514542, 'timestamp': '2025-09-30 22:25:59.081405', 'step': 9188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:59.148331', 'step': 9188, 'epoch': 2} {'type': 'loss', 'content': 0.1983204036951065, 'timestamp': '2025-09-30 22:25:59.151336', 'step': 9189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:59.214218', 'step': 9189, 'epoch': 2} {'type': 'loss', 'content': 0.13705486059188843, 'timestamp': '2025-09-30 22:25:59.217030', 'step': 9190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:59.285777', 'step': 9190, 'epoch': 2} {'type': 'loss', 'content': 0.06245550885796547, 'timestamp': '2025-09-30 22:25:59.290153', 'step': 9191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:59.348056', 'step': 9191, 'epoch': 2} {'type': 'loss', 'content': 0.13200344145298004, 'timestamp': '2025-09-30 22:25:59.354655', 'step': 9192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:25:59.430753', 'step': 9192, 'epoch': 2} {'type': 'loss', 'content': 0.21742941439151764, 'timestamp': '2025-09-30 22:25:59.435395', 'step': 9193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:59.504035', 'step': 9193, 'epoch': 2} {'type': 'loss', 'content': 0.1524403989315033, 'timestamp': '2025-09-30 22:25:59.508911', 'step': 9194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:59.573028', 'step': 9194, 'epoch': 2} {'type': 'loss', 'content': 0.13361744582653046, 'timestamp': '2025-09-30 22:25:59.576850', 'step': 9195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:59.643077', 'step': 9195, 'epoch': 2} {'type': 'loss', 'content': 0.1510198563337326, 'timestamp': '2025-09-30 22:25:59.658621', 'step': 9196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:25:59.717563', 'step': 9196, 'epoch': 2} {'type': 'loss', 'content': 0.09927596151828766, 'timestamp': '2025-09-30 22:25:59.720188', 'step': 9197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:25:59.781652', 'step': 9197, 'epoch': 2} {'type': 'loss', 'content': 0.16580483317375183, 'timestamp': '2025-09-30 22:25:59.789151', 'step': 9198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:59.852649', 'step': 9198, 'epoch': 2} {'type': 'loss', 'content': 0.1568889021873474, 'timestamp': '2025-09-30 22:25:59.856629', 'step': 9199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:25:59.916539', 'step': 9199, 'epoch': 2} {'type': 'loss', 'content': 0.17224034667015076, 'timestamp': '2025-09-30 22:25:59.924639', 'step': 9200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:25:59.983576', 'step': 9200, 'epoch': 2} {'type': 'loss', 'content': 0.13242100179195404, 'timestamp': '2025-09-30 22:25:59.996495', 'step': 9201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:00.078879', 'step': 9201, 'epoch': 2} {'type': 'loss', 'content': 0.12039582431316376, 'timestamp': '2025-09-30 22:26:00.085538', 'step': 9202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.144279', 'step': 9202, 'epoch': 2} {'type': 'loss', 'content': 0.09492898732423782, 'timestamp': '2025-09-30 22:26:00.147406', 'step': 9203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.206238', 'step': 9203, 'epoch': 2} {'type': 'loss', 'content': 0.08456520736217499, 'timestamp': '2025-09-30 22:26:00.219140', 'step': 9204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:00.276739', 'step': 9204, 'epoch': 2} {'type': 'loss', 'content': 0.17281347513198853, 'timestamp': '2025-09-30 22:26:00.280985', 'step': 9205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:00.342502', 'step': 9205, 'epoch': 2} {'type': 'loss', 'content': 0.13576847314834595, 'timestamp': '2025-09-30 22:26:00.345879', 'step': 9206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.406288', 'step': 9206, 'epoch': 2} {'type': 'loss', 'content': 0.08547976613044739, 'timestamp': '2025-09-30 22:26:00.414033', 'step': 9207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.473678', 'step': 9207, 'epoch': 2} {'type': 'loss', 'content': 0.12829087674617767, 'timestamp': '2025-09-30 22:26:00.480116', 'step': 9208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.536553', 'step': 9208, 'epoch': 2} {'type': 'loss', 'content': 0.1047830805182457, 'timestamp': '2025-09-30 22:26:00.539427', 'step': 9209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:00.610332', 'step': 9209, 'epoch': 2} {'type': 'loss', 'content': 0.11982376128435135, 'timestamp': '2025-09-30 22:26:00.615565', 'step': 9210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.674131', 'step': 9210, 'epoch': 2} {'type': 'loss', 'content': 0.14195384085178375, 'timestamp': '2025-09-30 22:26:00.677177', 'step': 9211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.743515', 'step': 9211, 'epoch': 2} {'type': 'loss', 'content': 0.17884142696857452, 'timestamp': '2025-09-30 22:26:00.752368', 'step': 9212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:00.810780', 'step': 9212, 'epoch': 2} {'type': 'loss', 'content': 0.14486762881278992, 'timestamp': '2025-09-30 22:26:00.814336', 'step': 9213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:00.873778', 'step': 9213, 'epoch': 2} {'type': 'loss', 'content': 0.15602222084999084, 'timestamp': '2025-09-30 22:26:00.876482', 'step': 9214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:00.935747', 'step': 9214, 'epoch': 2} {'type': 'loss', 'content': 0.0744004175066948, 'timestamp': '2025-09-30 22:26:00.945319', 'step': 9215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:01.009998', 'step': 9215, 'epoch': 2} {'type': 'loss', 'content': 0.08798909932374954, 'timestamp': '2025-09-30 22:26:01.016984', 'step': 9216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:01.073958', 'step': 9216, 'epoch': 2} {'type': 'loss', 'content': 0.11091874539852142, 'timestamp': '2025-09-30 22:26:01.077779', 'step': 9217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:01.146178', 'step': 9217, 'epoch': 2} {'type': 'loss', 'content': 0.1348140686750412, 'timestamp': '2025-09-30 22:26:01.148624', 'step': 9218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:01.206652', 'step': 9218, 'epoch': 2} {'type': 'loss', 'content': 0.12188413739204407, 'timestamp': '2025-09-30 22:26:01.210175', 'step': 9219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:01.268395', 'step': 9219, 'epoch': 2} {'type': 'loss', 'content': 0.11812492460012436, 'timestamp': '2025-09-30 22:26:01.281519', 'step': 9220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:01.338692', 'step': 9220, 'epoch': 2} {'type': 'loss', 'content': 0.09589262306690216, 'timestamp': '2025-09-30 22:26:01.343290', 'step': 9221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:01.407626', 'step': 9221, 'epoch': 2} {'type': 'loss', 'content': 0.12882596254348755, 'timestamp': '2025-09-30 22:26:01.410689', 'step': 9222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:01.477319', 'step': 9222, 'epoch': 2} {'type': 'loss', 'content': 0.11838989704847336, 'timestamp': '2025-09-30 22:26:01.483740', 'step': 9223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:01.547064', 'step': 9223, 'epoch': 2} {'type': 'loss', 'content': 0.1735517531633377, 'timestamp': '2025-09-30 22:26:01.554412', 'step': 9224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:01.612396', 'step': 9224, 'epoch': 2} {'type': 'loss', 'content': 0.11334749311208725, 'timestamp': '2025-09-30 22:26:01.624098', 'step': 9225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:01.686336', 'step': 9225, 'epoch': 2} {'type': 'loss', 'content': 0.1049618199467659, 'timestamp': '2025-09-30 22:26:01.690509', 'step': 9226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:01.753338', 'step': 9226, 'epoch': 2} {'type': 'loss', 'content': 0.09935548156499863, 'timestamp': '2025-09-30 22:26:01.759208', 'step': 9227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:01.833317', 'step': 9227, 'epoch': 2} {'type': 'loss', 'content': 0.13206294178962708, 'timestamp': '2025-09-30 22:26:01.845804', 'step': 9228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:01.925297', 'step': 9228, 'epoch': 2} {'type': 'loss', 'content': 0.11332981288433075, 'timestamp': '2025-09-30 22:26:01.934692', 'step': 9229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:01.998619', 'step': 9229, 'epoch': 2} {'type': 'loss', 'content': 0.13851256668567657, 'timestamp': '2025-09-30 22:26:02.001396', 'step': 9230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:02.060281', 'step': 9230, 'epoch': 2} {'type': 'loss', 'content': 0.12220035493373871, 'timestamp': '2025-09-30 22:26:02.063736', 'step': 9231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:02.126402', 'step': 9231, 'epoch': 2} {'type': 'loss', 'content': 0.08332251012325287, 'timestamp': '2025-09-30 22:26:02.132820', 'step': 9232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:02.198919', 'step': 9232, 'epoch': 2} {'type': 'loss', 'content': 0.1624458134174347, 'timestamp': '2025-09-30 22:26:02.201893', 'step': 9233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:02.259050', 'step': 9233, 'epoch': 2} {'type': 'loss', 'content': 0.18538445234298706, 'timestamp': '2025-09-30 22:26:02.266832', 'step': 9234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:02.331394', 'step': 9234, 'epoch': 2} {'type': 'loss', 'content': 0.1819797307252884, 'timestamp': '2025-09-30 22:26:02.334524', 'step': 9235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:02.399185', 'step': 9235, 'epoch': 2} {'type': 'loss', 'content': 0.13753429055213928, 'timestamp': '2025-09-30 22:26:02.405686', 'step': 9236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:02.466861', 'step': 9236, 'epoch': 2} {'type': 'loss', 'content': 0.15287812054157257, 'timestamp': '2025-09-30 22:26:02.471202', 'step': 9237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:02.533262', 'step': 9237, 'epoch': 2} {'type': 'loss', 'content': 0.16380134224891663, 'timestamp': '2025-09-30 22:26:02.549235', 'step': 9238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:02.625294', 'step': 9238, 'epoch': 2} {'type': 'loss', 'content': 0.15082204341888428, 'timestamp': '2025-09-30 22:26:02.628121', 'step': 9239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:02.710699', 'step': 9239, 'epoch': 2} {'type': 'loss', 'content': 0.1445820927619934, 'timestamp': '2025-09-30 22:26:02.720312', 'step': 9240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:02.777451', 'step': 9240, 'epoch': 2} {'type': 'loss', 'content': 0.18583588302135468, 'timestamp': '2025-09-30 22:26:02.783194', 'step': 9241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:02.841697', 'step': 9241, 'epoch': 2} {'type': 'loss', 'content': 0.1358713060617447, 'timestamp': '2025-09-30 22:26:02.844987', 'step': 9242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:02.903126', 'step': 9242, 'epoch': 2} {'type': 'loss', 'content': 0.12129071354866028, 'timestamp': '2025-09-30 22:26:02.907283', 'step': 9243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:02.965597', 'step': 9243, 'epoch': 2} {'type': 'loss', 'content': 0.1717539280653, 'timestamp': '2025-09-30 22:26:02.974670', 'step': 9244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:03.042756', 'step': 9244, 'epoch': 2} {'type': 'loss', 'content': 0.11891476064920425, 'timestamp': '2025-09-30 22:26:03.045833', 'step': 9245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.112081', 'step': 9245, 'epoch': 2} {'type': 'loss', 'content': 0.05720677226781845, 'timestamp': '2025-09-30 22:26:03.115203', 'step': 9246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.173671', 'step': 9246, 'epoch': 2} {'type': 'loss', 'content': 0.09858998656272888, 'timestamp': '2025-09-30 22:26:03.176218', 'step': 9247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:03.235534', 'step': 9247, 'epoch': 2} {'type': 'loss', 'content': 0.10477565228939056, 'timestamp': '2025-09-30 22:26:03.244249', 'step': 9248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.300731', 'step': 9248, 'epoch': 2} {'type': 'loss', 'content': 0.1192416101694107, 'timestamp': '2025-09-30 22:26:03.304740', 'step': 9249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:03.363943', 'step': 9249, 'epoch': 2} {'type': 'loss', 'content': 0.17812219262123108, 'timestamp': '2025-09-30 22:26:03.368663', 'step': 9250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:03.432843', 'step': 9250, 'epoch': 2} {'type': 'loss', 'content': 0.11085347831249237, 'timestamp': '2025-09-30 22:26:03.436020', 'step': 9251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.499042', 'step': 9251, 'epoch': 2} {'type': 'loss', 'content': 0.1313449740409851, 'timestamp': '2025-09-30 22:26:03.505944', 'step': 9252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.566153', 'step': 9252, 'epoch': 2} {'type': 'loss', 'content': 0.11018761992454529, 'timestamp': '2025-09-30 22:26:03.568627', 'step': 9253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.630142', 'step': 9253, 'epoch': 2} {'type': 'loss', 'content': 0.1766728162765503, 'timestamp': '2025-09-30 22:26:03.641026', 'step': 9254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:03.701472', 'step': 9254, 'epoch': 2} {'type': 'loss', 'content': 0.06785748898983002, 'timestamp': '2025-09-30 22:26:03.713572', 'step': 9255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.780985', 'step': 9255, 'epoch': 2} {'type': 'loss', 'content': 0.18255123496055603, 'timestamp': '2025-09-30 22:26:03.787660', 'step': 9256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.847047', 'step': 9256, 'epoch': 2} {'type': 'loss', 'content': 0.23707914352416992, 'timestamp': '2025-09-30 22:26:03.852922', 'step': 9257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:03.916726', 'step': 9257, 'epoch': 2} {'type': 'loss', 'content': 0.2066527009010315, 'timestamp': '2025-09-30 22:26:03.924270', 'step': 9258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:03.982917', 'step': 9258, 'epoch': 2} {'type': 'loss', 'content': 0.1424756795167923, 'timestamp': '2025-09-30 22:26:03.988412', 'step': 9259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:04.045798', 'step': 9259, 'epoch': 2} {'type': 'loss', 'content': 0.10309524834156036, 'timestamp': '2025-09-30 22:26:04.052442', 'step': 9260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:04.108683', 'step': 9260, 'epoch': 2} {'type': 'loss', 'content': 0.14010591804981232, 'timestamp': '2025-09-30 22:26:04.111727', 'step': 9261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:04.170964', 'step': 9261, 'epoch': 2} {'type': 'loss', 'content': 0.06110730767250061, 'timestamp': '2025-09-30 22:26:04.175657', 'step': 9262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:04.233827', 'step': 9262, 'epoch': 2} {'type': 'loss', 'content': 0.10627135634422302, 'timestamp': '2025-09-30 22:26:04.240346', 'step': 9263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:04.314315', 'step': 9263, 'epoch': 2} {'type': 'loss', 'content': 0.1454944610595703, 'timestamp': '2025-09-30 22:26:04.323111', 'step': 9264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:04.383174', 'step': 9264, 'epoch': 2} {'type': 'loss', 'content': 0.16864611208438873, 'timestamp': '2025-09-30 22:26:04.386182', 'step': 9265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:04.457832', 'step': 9265, 'epoch': 2} {'type': 'loss', 'content': 0.1771346479654312, 'timestamp': '2025-09-30 22:26:04.463329', 'step': 9266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:04.522514', 'step': 9266, 'epoch': 2} {'type': 'loss', 'content': 0.12794549763202667, 'timestamp': '2025-09-30 22:26:04.525567', 'step': 9267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:04.583357', 'step': 9267, 'epoch': 2} {'type': 'loss', 'content': 0.16114939749240875, 'timestamp': '2025-09-30 22:26:04.592977', 'step': 9268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:04.652009', 'step': 9268, 'epoch': 2} {'type': 'loss', 'content': 0.12278562784194946, 'timestamp': '2025-09-30 22:26:04.662209', 'step': 9269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:26:04.727971', 'step': 9269, 'epoch': 2} {'type': 'loss', 'content': 0.15780790150165558, 'timestamp': '2025-09-30 22:26:04.730913', 'step': 9270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:04.788300', 'step': 9270, 'epoch': 2} {'type': 'loss', 'content': 0.07636970281600952, 'timestamp': '2025-09-30 22:26:04.792134', 'step': 9271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:04.850395', 'step': 9271, 'epoch': 2} {'type': 'loss', 'content': 0.11616592109203339, 'timestamp': '2025-09-30 22:26:04.865512', 'step': 9272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:04.941932', 'step': 9272, 'epoch': 2} {'type': 'loss', 'content': 0.10991443693637848, 'timestamp': '2025-09-30 22:26:04.945735', 'step': 9273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:26:05.013727', 'step': 9273, 'epoch': 2} {'type': 'loss', 'content': 0.08544979989528656, 'timestamp': '2025-09-30 22:26:05.026607', 'step': 9274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:05.084439', 'step': 9274, 'epoch': 2} {'type': 'loss', 'content': 0.07813848555088043, 'timestamp': '2025-09-30 22:26:05.087968', 'step': 9275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:05.146715', 'step': 9275, 'epoch': 2} {'type': 'loss', 'content': 0.15207847952842712, 'timestamp': '2025-09-30 22:26:05.152776', 'step': 9276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:05.209800', 'step': 9276, 'epoch': 2} {'type': 'loss', 'content': 0.10883302986621857, 'timestamp': '2025-09-30 22:26:05.216346', 'step': 9277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:05.277690', 'step': 9277, 'epoch': 2} {'type': 'loss', 'content': 0.06788913905620575, 'timestamp': '2025-09-30 22:26:05.284267', 'step': 9278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:05.342279', 'step': 9278, 'epoch': 2} {'type': 'loss', 'content': 0.16304659843444824, 'timestamp': '2025-09-30 22:26:05.345220', 'step': 9279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:05.402754', 'step': 9279, 'epoch': 2} {'type': 'loss', 'content': 0.1494675576686859, 'timestamp': '2025-09-30 22:26:05.409298', 'step': 9280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:05.475428', 'step': 9280, 'epoch': 2} {'type': 'loss', 'content': 0.08260434120893478, 'timestamp': '2025-09-30 22:26:05.478247', 'step': 9281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:05.534373', 'step': 9281, 'epoch': 2} {'type': 'loss', 'content': 0.1296738237142563, 'timestamp': '2025-09-30 22:26:05.536831', 'step': 9282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:05.593891', 'step': 9282, 'epoch': 2} {'type': 'loss', 'content': 0.11559037864208221, 'timestamp': '2025-09-30 22:26:05.596975', 'step': 9283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:05.654168', 'step': 9283, 'epoch': 2} {'type': 'loss', 'content': 0.12266488373279572, 'timestamp': '2025-09-30 22:26:05.661542', 'step': 9284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:05.729391', 'step': 9284, 'epoch': 2} {'type': 'loss', 'content': 0.21058696508407593, 'timestamp': '2025-09-30 22:26:05.732745', 'step': 9285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:05.800204', 'step': 9285, 'epoch': 2} {'type': 'loss', 'content': 0.13317467272281647, 'timestamp': '2025-09-30 22:26:05.804011', 'step': 9286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:05.871580', 'step': 9286, 'epoch': 2} {'type': 'loss', 'content': 0.1695835441350937, 'timestamp': '2025-09-30 22:26:05.875639', 'step': 9287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:05.933642', 'step': 9287, 'epoch': 2} {'type': 'loss', 'content': 0.07512607425451279, 'timestamp': '2025-09-30 22:26:05.939827', 'step': 9288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:06.006638', 'step': 9288, 'epoch': 2} {'type': 'loss', 'content': 0.09015282988548279, 'timestamp': '2025-09-30 22:26:06.011738', 'step': 9289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:06.070567', 'step': 9289, 'epoch': 2} {'type': 'loss', 'content': 0.056927215307950974, 'timestamp': '2025-09-30 22:26:06.074076', 'step': 9290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:06.131071', 'step': 9290, 'epoch': 2} {'type': 'loss', 'content': 0.15564242005348206, 'timestamp': '2025-09-30 22:26:06.133803', 'step': 9291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:06.194251', 'step': 9291, 'epoch': 2} {'type': 'loss', 'content': 0.1433488428592682, 'timestamp': '2025-09-30 22:26:06.200585', 'step': 9292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:06.263952', 'step': 9292, 'epoch': 2} {'type': 'loss', 'content': 0.17937639355659485, 'timestamp': '2025-09-30 22:26:06.266468', 'step': 9293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:06.326044', 'step': 9293, 'epoch': 2} {'type': 'loss', 'content': 0.1618930846452713, 'timestamp': '2025-09-30 22:26:06.334186', 'step': 9294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:06.399540', 'step': 9294, 'epoch': 2} {'type': 'loss', 'content': 0.06922072172164917, 'timestamp': '2025-09-30 22:26:06.402242', 'step': 9295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:06.458469', 'step': 9295, 'epoch': 2} {'type': 'loss', 'content': 0.1657414436340332, 'timestamp': '2025-09-30 22:26:06.465012', 'step': 9296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:06.531829', 'step': 9296, 'epoch': 2} {'type': 'loss', 'content': 0.07218644767999649, 'timestamp': '2025-09-30 22:26:06.538524', 'step': 9297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:06.596246', 'step': 9297, 'epoch': 2} {'type': 'loss', 'content': 0.17714256048202515, 'timestamp': '2025-09-30 22:26:06.599147', 'step': 9298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:06.659314', 'step': 9298, 'epoch': 2} {'type': 'loss', 'content': 0.18105246126651764, 'timestamp': '2025-09-30 22:26:06.662555', 'step': 9299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:06.725363', 'step': 9299, 'epoch': 2} {'type': 'loss', 'content': 0.20877039432525635, 'timestamp': '2025-09-30 22:26:06.731766', 'step': 9300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:06.788513', 'step': 9300, 'epoch': 2} {'type': 'loss', 'content': 0.07524760812520981, 'timestamp': '2025-09-30 22:26:06.791150', 'step': 9301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:06.848606', 'step': 9301, 'epoch': 2} {'type': 'loss', 'content': 0.12734094262123108, 'timestamp': '2025-09-30 22:26:06.851361', 'step': 9302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:06.913549', 'step': 9302, 'epoch': 2} {'type': 'loss', 'content': 0.12766693532466888, 'timestamp': '2025-09-30 22:26:06.916476', 'step': 9303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:06.974418', 'step': 9303, 'epoch': 2} {'type': 'loss', 'content': 0.13930854201316833, 'timestamp': '2025-09-30 22:26:06.980290', 'step': 9304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:07.048105', 'step': 9304, 'epoch': 2} {'type': 'loss', 'content': 0.13655880093574524, 'timestamp': '2025-09-30 22:26:07.056157', 'step': 9305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:07.135822', 'step': 9305, 'epoch': 2} {'type': 'loss', 'content': 0.16548943519592285, 'timestamp': '2025-09-30 22:26:07.142435', 'step': 9306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:07.201403', 'step': 9306, 'epoch': 2} {'type': 'loss', 'content': 0.17938512563705444, 'timestamp': '2025-09-30 22:26:07.203922', 'step': 9307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:07.265476', 'step': 9307, 'epoch': 2} {'type': 'loss', 'content': 0.1567641645669937, 'timestamp': '2025-09-30 22:26:07.272722', 'step': 9308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:07.329686', 'step': 9308, 'epoch': 2} {'type': 'loss', 'content': 0.15597155690193176, 'timestamp': '2025-09-30 22:26:07.333282', 'step': 9309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:07.393553', 'step': 9309, 'epoch': 2} {'type': 'loss', 'content': 0.1061398983001709, 'timestamp': '2025-09-30 22:26:07.396227', 'step': 9310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:07.454143', 'step': 9310, 'epoch': 2} {'type': 'loss', 'content': 0.10649628192186356, 'timestamp': '2025-09-30 22:26:07.456904', 'step': 9311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:07.515407', 'step': 9311, 'epoch': 2} {'type': 'loss', 'content': 0.1743353307247162, 'timestamp': '2025-09-30 22:26:07.527246', 'step': 9312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:07.584270', 'step': 9312, 'epoch': 2} {'type': 'loss', 'content': 0.16888199746608734, 'timestamp': '2025-09-30 22:26:07.588451', 'step': 9313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:07.644978', 'step': 9313, 'epoch': 2} {'type': 'loss', 'content': 0.11224089562892914, 'timestamp': '2025-09-30 22:26:07.655964', 'step': 9314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:07.719130', 'step': 9314, 'epoch': 2} {'type': 'loss', 'content': 0.1087019294500351, 'timestamp': '2025-09-30 22:26:07.730313', 'step': 9315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:07.795871', 'step': 9315, 'epoch': 2} {'type': 'loss', 'content': 0.10027524828910828, 'timestamp': '2025-09-30 22:26:07.810567', 'step': 9316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:07.875781', 'step': 9316, 'epoch': 2} {'type': 'loss', 'content': 0.08969699591398239, 'timestamp': '2025-09-30 22:26:07.879224', 'step': 9317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:07.937597', 'step': 9317, 'epoch': 2} {'type': 'loss', 'content': 0.055351004004478455, 'timestamp': '2025-09-30 22:26:07.940881', 'step': 9318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:07.997390', 'step': 9318, 'epoch': 2} {'type': 'loss', 'content': 0.07615210115909576, 'timestamp': '2025-09-30 22:26:08.010369', 'step': 9319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:08.079882', 'step': 9319, 'epoch': 2} {'type': 'loss', 'content': 0.10636147111654282, 'timestamp': '2025-09-30 22:26:08.085743', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:26:23.143406', 'step': 9320, 'epoch': 2} {'type': 'pplx', 'content': 12598.520396501714, 'timestamp': '2025-09-30 22:26:23.147939', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:23.206223', 'step': 9320, 'epoch': 2} {'type': 'loss', 'content': 0.10749652236700058, 'timestamp': '2025-09-30 22:26:23.212133', 'step': 9321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:23.274227', 'step': 9321, 'epoch': 2} {'type': 'loss', 'content': 0.15349477529525757, 'timestamp': '2025-09-30 22:26:23.283225', 'step': 9322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:23.349466', 'step': 9322, 'epoch': 2} {'type': 'loss', 'content': 0.13873566687107086, 'timestamp': '2025-09-30 22:26:23.356763', 'step': 9323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:23.421219', 'step': 9323, 'epoch': 2} {'type': 'loss', 'content': 0.21724794805049896, 'timestamp': '2025-09-30 22:26:23.428041', 'step': 9324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:23.489197', 'step': 9324, 'epoch': 2} {'type': 'loss', 'content': 0.14747580885887146, 'timestamp': '2025-09-30 22:26:23.492334', 'step': 9325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:23.553292', 'step': 9325, 'epoch': 2} {'type': 'loss', 'content': 0.1479075849056244, 'timestamp': '2025-09-30 22:26:23.556331', 'step': 9326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:23.622133', 'step': 9326, 'epoch': 2} {'type': 'loss', 'content': 0.07228387892246246, 'timestamp': '2025-09-30 22:26:23.627485', 'step': 9327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:23.686566', 'step': 9327, 'epoch': 2} {'type': 'loss', 'content': 0.18571771681308746, 'timestamp': '2025-09-30 22:26:23.694830', 'step': 9328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:23.755912', 'step': 9328, 'epoch': 2} {'type': 'loss', 'content': 0.09494847804307938, 'timestamp': '2025-09-30 22:26:23.758528', 'step': 9329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:23.819623', 'step': 9329, 'epoch': 2} {'type': 'loss', 'content': 0.11457236111164093, 'timestamp': '2025-09-30 22:26:23.831970', 'step': 9330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:23.899114', 'step': 9330, 'epoch': 2} {'type': 'loss', 'content': 0.13764230906963348, 'timestamp': '2025-09-30 22:26:23.905525', 'step': 9331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:23.965183', 'step': 9331, 'epoch': 2} {'type': 'loss', 'content': 0.1622709035873413, 'timestamp': '2025-09-30 22:26:23.971867', 'step': 9332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:24.028308', 'step': 9332, 'epoch': 2} {'type': 'loss', 'content': 0.14999395608901978, 'timestamp': '2025-09-30 22:26:24.031175', 'step': 9333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:24.097603', 'step': 9333, 'epoch': 2} {'type': 'loss', 'content': 0.09300633519887924, 'timestamp': '2025-09-30 22:26:24.104537', 'step': 9334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:24.169801', 'step': 9334, 'epoch': 2} {'type': 'loss', 'content': 0.11372236162424088, 'timestamp': '2025-09-30 22:26:24.173522', 'step': 9335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:24.235439', 'step': 9335, 'epoch': 2} {'type': 'loss', 'content': 0.10157264024019241, 'timestamp': '2025-09-30 22:26:24.244605', 'step': 9336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:24.306290', 'step': 9336, 'epoch': 2} {'type': 'loss', 'content': 0.1699100285768509, 'timestamp': '2025-09-30 22:26:24.310080', 'step': 9337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:24.367683', 'step': 9337, 'epoch': 2} {'type': 'loss', 'content': 0.07871522009372711, 'timestamp': '2025-09-30 22:26:24.374634', 'step': 9338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:24.435691', 'step': 9338, 'epoch': 2} {'type': 'loss', 'content': 0.10339437425136566, 'timestamp': '2025-09-30 22:26:24.443749', 'step': 9339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:24.508101', 'step': 9339, 'epoch': 2} {'type': 'loss', 'content': 0.08398556709289551, 'timestamp': '2025-09-30 22:26:24.514557', 'step': 9340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:24.575593', 'step': 9340, 'epoch': 2} {'type': 'loss', 'content': 0.17825302481651306, 'timestamp': '2025-09-30 22:26:24.582354', 'step': 9341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:24.645387', 'step': 9341, 'epoch': 2} {'type': 'loss', 'content': 0.058855824172496796, 'timestamp': '2025-09-30 22:26:24.651493', 'step': 9342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:24.716277', 'step': 9342, 'epoch': 2} {'type': 'loss', 'content': 0.14943353831768036, 'timestamp': '2025-09-30 22:26:24.723777', 'step': 9343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:24.785863', 'step': 9343, 'epoch': 2} {'type': 'loss', 'content': 0.11950308829545975, 'timestamp': '2025-09-30 22:26:24.796412', 'step': 9344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:24.859278', 'step': 9344, 'epoch': 2} {'type': 'loss', 'content': 0.11689887195825577, 'timestamp': '2025-09-30 22:26:24.862098', 'step': 9345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:24.919579', 'step': 9345, 'epoch': 2} {'type': 'loss', 'content': 0.12223293632268906, 'timestamp': '2025-09-30 22:26:24.922396', 'step': 9346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:24.981274', 'step': 9346, 'epoch': 2} {'type': 'loss', 'content': 0.1227952316403389, 'timestamp': '2025-09-30 22:26:24.983741', 'step': 9347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:25.040853', 'step': 9347, 'epoch': 2} {'type': 'loss', 'content': 0.15997330844402313, 'timestamp': '2025-09-30 22:26:25.047666', 'step': 9348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:25.104647', 'step': 9348, 'epoch': 2} {'type': 'loss', 'content': 0.09161843359470367, 'timestamp': '2025-09-30 22:26:25.107274', 'step': 9349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:25.167595', 'step': 9349, 'epoch': 2} {'type': 'loss', 'content': 0.20892375707626343, 'timestamp': '2025-09-30 22:26:25.170728', 'step': 9350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:25.227609', 'step': 9350, 'epoch': 2} {'type': 'loss', 'content': 0.11415643244981766, 'timestamp': '2025-09-30 22:26:25.231946', 'step': 9351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:25.288942', 'step': 9351, 'epoch': 2} {'type': 'loss', 'content': 0.12622861564159393, 'timestamp': '2025-09-30 22:26:25.298896', 'step': 9352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:25.356040', 'step': 9352, 'epoch': 2} {'type': 'loss', 'content': 0.08918894082307816, 'timestamp': '2025-09-30 22:26:25.362621', 'step': 9353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:25.421889', 'step': 9353, 'epoch': 2} {'type': 'loss', 'content': 0.12346415966749191, 'timestamp': '2025-09-30 22:26:25.428856', 'step': 9354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:25.488970', 'step': 9354, 'epoch': 2} {'type': 'loss', 'content': 0.13739027082920074, 'timestamp': '2025-09-30 22:26:25.491376', 'step': 9355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:25.548554', 'step': 9355, 'epoch': 2} {'type': 'loss', 'content': 0.16158263385295868, 'timestamp': '2025-09-30 22:26:25.555946', 'step': 9356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:25.616704', 'step': 9356, 'epoch': 2} {'type': 'loss', 'content': 0.10188426822423935, 'timestamp': '2025-09-30 22:26:25.619694', 'step': 9357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:25.681838', 'step': 9357, 'epoch': 2} {'type': 'loss', 'content': 0.08501143008470535, 'timestamp': '2025-09-30 22:26:25.687790', 'step': 9358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:25.744702', 'step': 9358, 'epoch': 2} {'type': 'loss', 'content': 0.09394276142120361, 'timestamp': '2025-09-30 22:26:25.747979', 'step': 9359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:25.808627', 'step': 9359, 'epoch': 2} {'type': 'loss', 'content': 0.12959745526313782, 'timestamp': '2025-09-30 22:26:25.818357', 'step': 9360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:25.882980', 'step': 9360, 'epoch': 2} {'type': 'loss', 'content': 0.16850201785564423, 'timestamp': '2025-09-30 22:26:25.885458', 'step': 9361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:25.943229', 'step': 9361, 'epoch': 2} {'type': 'loss', 'content': 0.19932031631469727, 'timestamp': '2025-09-30 22:26:25.945986', 'step': 9362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:26.004626', 'step': 9362, 'epoch': 2} {'type': 'loss', 'content': 0.23319360613822937, 'timestamp': '2025-09-30 22:26:26.015558', 'step': 9363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:26.079471', 'step': 9363, 'epoch': 2} {'type': 'loss', 'content': 0.22089950740337372, 'timestamp': '2025-09-30 22:26:26.087515', 'step': 9364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:26.145051', 'step': 9364, 'epoch': 2} {'type': 'loss', 'content': 0.12959258258342743, 'timestamp': '2025-09-30 22:26:26.156888', 'step': 9365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:26.219115', 'step': 9365, 'epoch': 2} {'type': 'loss', 'content': 0.17655257880687714, 'timestamp': '2025-09-30 22:26:26.222246', 'step': 9366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:26.287958', 'step': 9366, 'epoch': 2} {'type': 'loss', 'content': 0.1459973156452179, 'timestamp': '2025-09-30 22:26:26.290777', 'step': 9367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:26.353087', 'step': 9367, 'epoch': 2} {'type': 'loss', 'content': 0.1732030212879181, 'timestamp': '2025-09-30 22:26:26.365370', 'step': 9368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:26.421983', 'step': 9368, 'epoch': 2} {'type': 'loss', 'content': 0.16801871359348297, 'timestamp': '2025-09-30 22:26:26.429918', 'step': 9369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:26.496231', 'step': 9369, 'epoch': 2} {'type': 'loss', 'content': 0.15536540746688843, 'timestamp': '2025-09-30 22:26:26.499311', 'step': 9370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:26.567688', 'step': 9370, 'epoch': 2} {'type': 'loss', 'content': 0.12002508342266083, 'timestamp': '2025-09-30 22:26:26.575777', 'step': 9371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:26.636873', 'step': 9371, 'epoch': 2} {'type': 'loss', 'content': 0.1652686446905136, 'timestamp': '2025-09-30 22:26:26.642725', 'step': 9372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:26.698810', 'step': 9372, 'epoch': 2} {'type': 'loss', 'content': 0.11285971850156784, 'timestamp': '2025-09-30 22:26:26.705362', 'step': 9373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:26.766470', 'step': 9373, 'epoch': 2} {'type': 'loss', 'content': 0.12932194769382477, 'timestamp': '2025-09-30 22:26:26.773984', 'step': 9374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:26.836115', 'step': 9374, 'epoch': 2} {'type': 'loss', 'content': 0.16062578558921814, 'timestamp': '2025-09-30 22:26:26.838890', 'step': 9375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:26.897078', 'step': 9375, 'epoch': 2} {'type': 'loss', 'content': 0.12628981471061707, 'timestamp': '2025-09-30 22:26:26.907952', 'step': 9376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:26.965442', 'step': 9376, 'epoch': 2} {'type': 'loss', 'content': 0.12347513437271118, 'timestamp': '2025-09-30 22:26:26.968005', 'step': 9377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:27.028383', 'step': 9377, 'epoch': 2} {'type': 'loss', 'content': 0.16070736944675446, 'timestamp': '2025-09-30 22:26:27.031807', 'step': 9378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:27.091121', 'step': 9378, 'epoch': 2} {'type': 'loss', 'content': 0.1257057636976242, 'timestamp': '2025-09-30 22:26:27.097849', 'step': 9379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:27.157443', 'step': 9379, 'epoch': 2} {'type': 'loss', 'content': 0.1147322803735733, 'timestamp': '2025-09-30 22:26:27.163810', 'step': 9380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:27.220766', 'step': 9380, 'epoch': 2} {'type': 'loss', 'content': 0.18824957311153412, 'timestamp': '2025-09-30 22:26:27.227190', 'step': 9381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:27.293365', 'step': 9381, 'epoch': 2} {'type': 'loss', 'content': 0.0981786698102951, 'timestamp': '2025-09-30 22:26:27.302586', 'step': 9382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:27.365500', 'step': 9382, 'epoch': 2} {'type': 'loss', 'content': 0.10256590694189072, 'timestamp': '2025-09-30 22:26:27.375914', 'step': 9383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:27.441755', 'step': 9383, 'epoch': 2} {'type': 'loss', 'content': 0.18126735091209412, 'timestamp': '2025-09-30 22:26:27.451795', 'step': 9384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:27.514156', 'step': 9384, 'epoch': 2} {'type': 'loss', 'content': 0.19612175226211548, 'timestamp': '2025-09-30 22:26:27.520478', 'step': 9385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:27.584845', 'step': 9385, 'epoch': 2} {'type': 'loss', 'content': 0.19077573716640472, 'timestamp': '2025-09-30 22:26:27.599858', 'step': 9386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:27.664507', 'step': 9386, 'epoch': 2} {'type': 'loss', 'content': 0.07434462755918503, 'timestamp': '2025-09-30 22:26:27.667756', 'step': 9387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:27.726427', 'step': 9387, 'epoch': 2} {'type': 'loss', 'content': 0.17400185763835907, 'timestamp': '2025-09-30 22:26:27.737915', 'step': 9388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:27.794326', 'step': 9388, 'epoch': 2} {'type': 'loss', 'content': 0.1660746932029724, 'timestamp': '2025-09-30 22:26:27.799884', 'step': 9389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:27.858576', 'step': 9389, 'epoch': 2} {'type': 'loss', 'content': 0.13398343324661255, 'timestamp': '2025-09-30 22:26:27.866642', 'step': 9390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:27.933941', 'step': 9390, 'epoch': 2} {'type': 'loss', 'content': 0.16698624193668365, 'timestamp': '2025-09-30 22:26:27.942152', 'step': 9391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:28.001617', 'step': 9391, 'epoch': 2} {'type': 'loss', 'content': 0.10431952029466629, 'timestamp': '2025-09-30 22:26:28.008092', 'step': 9392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:28.065141', 'step': 9392, 'epoch': 2} {'type': 'loss', 'content': 0.18612222373485565, 'timestamp': '2025-09-30 22:26:28.072151', 'step': 9393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:28.138499', 'step': 9393, 'epoch': 2} {'type': 'loss', 'content': 0.12877431511878967, 'timestamp': '2025-09-30 22:26:28.141536', 'step': 9394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:28.204302', 'step': 9394, 'epoch': 2} {'type': 'loss', 'content': 0.11554934084415436, 'timestamp': '2025-09-30 22:26:28.206849', 'step': 9395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:28.277520', 'step': 9395, 'epoch': 2} {'type': 'loss', 'content': 0.1721368283033371, 'timestamp': '2025-09-30 22:26:28.289038', 'step': 9396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:28.345528', 'step': 9396, 'epoch': 2} {'type': 'loss', 'content': 0.09625653922557831, 'timestamp': '2025-09-30 22:26:28.353357', 'step': 9397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:28.410970', 'step': 9397, 'epoch': 2} {'type': 'loss', 'content': 0.1562214344739914, 'timestamp': '2025-09-30 22:26:28.413823', 'step': 9398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:28.472152', 'step': 9398, 'epoch': 2} {'type': 'loss', 'content': 0.1859733760356903, 'timestamp': '2025-09-30 22:26:28.475339', 'step': 9399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:28.532497', 'step': 9399, 'epoch': 2} {'type': 'loss', 'content': 0.14911529421806335, 'timestamp': '2025-09-30 22:26:28.539829', 'step': 9400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:28.597486', 'step': 9400, 'epoch': 2} {'type': 'loss', 'content': 0.20061323046684265, 'timestamp': '2025-09-30 22:26:28.600154', 'step': 9401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:28.665380', 'step': 9401, 'epoch': 2} {'type': 'loss', 'content': 0.16303735971450806, 'timestamp': '2025-09-30 22:26:28.667898', 'step': 9402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:28.725604', 'step': 9402, 'epoch': 2} {'type': 'loss', 'content': 0.11833341419696808, 'timestamp': '2025-09-30 22:26:28.728443', 'step': 9403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:28.792314', 'step': 9403, 'epoch': 2} {'type': 'loss', 'content': 0.20749884843826294, 'timestamp': '2025-09-30 22:26:28.801101', 'step': 9404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:28.858980', 'step': 9404, 'epoch': 2} {'type': 'loss', 'content': 0.20104478299617767, 'timestamp': '2025-09-30 22:26:28.867878', 'step': 9405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:28.925716', 'step': 9405, 'epoch': 2} {'type': 'loss', 'content': 0.12090826779603958, 'timestamp': '2025-09-30 22:26:28.928531', 'step': 9406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:28.991202', 'step': 9406, 'epoch': 2} {'type': 'loss', 'content': 0.1443815976381302, 'timestamp': '2025-09-30 22:26:29.000771', 'step': 9407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:29.061820', 'step': 9407, 'epoch': 2} {'type': 'loss', 'content': 0.1430947184562683, 'timestamp': '2025-09-30 22:26:29.068747', 'step': 9408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:29.135282', 'step': 9408, 'epoch': 2} {'type': 'loss', 'content': 0.25826728343963623, 'timestamp': '2025-09-30 22:26:29.144684', 'step': 9409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:29.202837', 'step': 9409, 'epoch': 2} {'type': 'loss', 'content': 0.11707263439893723, 'timestamp': '2025-09-30 22:26:29.207059', 'step': 9410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:29.270220', 'step': 9410, 'epoch': 2} {'type': 'loss', 'content': 0.24495290219783783, 'timestamp': '2025-09-30 22:26:29.274325', 'step': 9411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:29.332206', 'step': 9411, 'epoch': 2} {'type': 'loss', 'content': 0.10100433975458145, 'timestamp': '2025-09-30 22:26:29.339997', 'step': 9412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:29.402726', 'step': 9412, 'epoch': 2} {'type': 'loss', 'content': 0.12783198058605194, 'timestamp': '2025-09-30 22:26:29.406262', 'step': 9413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:29.464378', 'step': 9413, 'epoch': 2} {'type': 'loss', 'content': 0.07926011830568314, 'timestamp': '2025-09-30 22:26:29.468028', 'step': 9414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:29.526439', 'step': 9414, 'epoch': 2} {'type': 'loss', 'content': 0.11464887112379074, 'timestamp': '2025-09-30 22:26:29.536740', 'step': 9415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:29.603080', 'step': 9415, 'epoch': 2} {'type': 'loss', 'content': 0.12643641233444214, 'timestamp': '2025-09-30 22:26:29.609424', 'step': 9416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:29.666456', 'step': 9416, 'epoch': 2} {'type': 'loss', 'content': 0.1344345211982727, 'timestamp': '2025-09-30 22:26:29.678643', 'step': 9417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:29.745869', 'step': 9417, 'epoch': 2} {'type': 'loss', 'content': 0.11244773864746094, 'timestamp': '2025-09-30 22:26:29.749356', 'step': 9418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:29.810839', 'step': 9418, 'epoch': 2} {'type': 'loss', 'content': 0.13043096661567688, 'timestamp': '2025-09-30 22:26:29.821161', 'step': 9419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:29.878708', 'step': 9419, 'epoch': 2} {'type': 'loss', 'content': 0.15671904385089874, 'timestamp': '2025-09-30 22:26:29.890997', 'step': 9420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:29.949299', 'step': 9420, 'epoch': 2} {'type': 'loss', 'content': 0.0676865354180336, 'timestamp': '2025-09-30 22:26:29.958418', 'step': 9421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:30.016587', 'step': 9421, 'epoch': 2} {'type': 'loss', 'content': 0.099026620388031, 'timestamp': '2025-09-30 22:26:30.020745', 'step': 9422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:30.078753', 'step': 9422, 'epoch': 2} {'type': 'loss', 'content': 0.09707450121641159, 'timestamp': '2025-09-30 22:26:30.083160', 'step': 9423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:30.149068', 'step': 9423, 'epoch': 2} {'type': 'loss', 'content': 0.09208277612924576, 'timestamp': '2025-09-30 22:26:30.156447', 'step': 9424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:30.227857', 'step': 9424, 'epoch': 2} {'type': 'loss', 'content': 0.07829206436872482, 'timestamp': '2025-09-30 22:26:30.230646', 'step': 9425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:30.295792', 'step': 9425, 'epoch': 2} {'type': 'loss', 'content': 0.10040432959794998, 'timestamp': '2025-09-30 22:26:30.298667', 'step': 9426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:30.371288', 'step': 9426, 'epoch': 2} {'type': 'loss', 'content': 0.2584115266799927, 'timestamp': '2025-09-30 22:26:30.375500', 'step': 9427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:30.436223', 'step': 9427, 'epoch': 2} {'type': 'loss', 'content': 0.12112937867641449, 'timestamp': '2025-09-30 22:26:30.442204', 'step': 9428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:30.502173', 'step': 9428, 'epoch': 2} {'type': 'loss', 'content': 0.10286924988031387, 'timestamp': '2025-09-30 22:26:30.505229', 'step': 9429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:30.566610', 'step': 9429, 'epoch': 2} {'type': 'loss', 'content': 0.11150496453046799, 'timestamp': '2025-09-30 22:26:30.569624', 'step': 9430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:30.628301', 'step': 9430, 'epoch': 2} {'type': 'loss', 'content': 0.18610799312591553, 'timestamp': '2025-09-30 22:26:30.631048', 'step': 9431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:30.692123', 'step': 9431, 'epoch': 2} {'type': 'loss', 'content': 0.09516041725873947, 'timestamp': '2025-09-30 22:26:30.703538', 'step': 9432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:30.760953', 'step': 9432, 'epoch': 2} {'type': 'loss', 'content': 0.10881469398736954, 'timestamp': '2025-09-30 22:26:30.766635', 'step': 9433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:30.827477', 'step': 9433, 'epoch': 2} {'type': 'loss', 'content': 0.12636186182498932, 'timestamp': '2025-09-30 22:26:30.834554', 'step': 9434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:30.898962', 'step': 9434, 'epoch': 2} {'type': 'loss', 'content': 0.08876368403434753, 'timestamp': '2025-09-30 22:26:30.906507', 'step': 9435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:30.964211', 'step': 9435, 'epoch': 2} {'type': 'loss', 'content': 0.10505275428295135, 'timestamp': '2025-09-30 22:26:30.976697', 'step': 9436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:31.037920', 'step': 9436, 'epoch': 2} {'type': 'loss', 'content': 0.12464310973882675, 'timestamp': '2025-09-30 22:26:31.044755', 'step': 9437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:31.102802', 'step': 9437, 'epoch': 2} {'type': 'loss', 'content': 0.11242511868476868, 'timestamp': '2025-09-30 22:26:31.105939', 'step': 9438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:31.168808', 'step': 9438, 'epoch': 2} {'type': 'loss', 'content': 0.1728924810886383, 'timestamp': '2025-09-30 22:26:31.176356', 'step': 9439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:31.233443', 'step': 9439, 'epoch': 2} {'type': 'loss', 'content': 0.10744158178567886, 'timestamp': '2025-09-30 22:26:31.240367', 'step': 9440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:31.299518', 'step': 9440, 'epoch': 2} {'type': 'loss', 'content': 0.1251991093158722, 'timestamp': '2025-09-30 22:26:31.305911', 'step': 9441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:31.366416', 'step': 9441, 'epoch': 2} {'type': 'loss', 'content': 0.24156664311885834, 'timestamp': '2025-09-30 22:26:31.369748', 'step': 9442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:31.428142', 'step': 9442, 'epoch': 2} {'type': 'loss', 'content': 0.1310327649116516, 'timestamp': '2025-09-30 22:26:31.430900', 'step': 9443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:31.491615', 'step': 9443, 'epoch': 2} {'type': 'loss', 'content': 0.1974288821220398, 'timestamp': '2025-09-30 22:26:31.503593', 'step': 9444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:31.563913', 'step': 9444, 'epoch': 2} {'type': 'loss', 'content': 0.09733704477548599, 'timestamp': '2025-09-30 22:26:31.567088', 'step': 9445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:31.625631', 'step': 9445, 'epoch': 2} {'type': 'loss', 'content': 0.0929405465722084, 'timestamp': '2025-09-30 22:26:31.631423', 'step': 9446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:31.689843', 'step': 9446, 'epoch': 2} {'type': 'loss', 'content': 0.17419025301933289, 'timestamp': '2025-09-30 22:26:31.697862', 'step': 9447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:31.762003', 'step': 9447, 'epoch': 2} {'type': 'loss', 'content': 0.14576350152492523, 'timestamp': '2025-09-30 22:26:31.773104', 'step': 9448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:31.835316', 'step': 9448, 'epoch': 2} {'type': 'loss', 'content': 0.21360020339488983, 'timestamp': '2025-09-30 22:26:31.838239', 'step': 9449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:31.896558', 'step': 9449, 'epoch': 2} {'type': 'loss', 'content': 0.10588442534208298, 'timestamp': '2025-09-30 22:26:31.904251', 'step': 9450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:31.967029', 'step': 9450, 'epoch': 2} {'type': 'loss', 'content': 0.15843437612056732, 'timestamp': '2025-09-30 22:26:31.970567', 'step': 9451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:32.029738', 'step': 9451, 'epoch': 2} {'type': 'loss', 'content': 0.09019894897937775, 'timestamp': '2025-09-30 22:26:32.037676', 'step': 9452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:32.105006', 'step': 9452, 'epoch': 2} {'type': 'loss', 'content': 0.07741051912307739, 'timestamp': '2025-09-30 22:26:32.108851', 'step': 9453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:32.173611', 'step': 9453, 'epoch': 2} {'type': 'loss', 'content': 0.13557276129722595, 'timestamp': '2025-09-30 22:26:32.187105', 'step': 9454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:32.244305', 'step': 9454, 'epoch': 2} {'type': 'loss', 'content': 0.20021075010299683, 'timestamp': '2025-09-30 22:26:32.247747', 'step': 9455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:32.306274', 'step': 9455, 'epoch': 2} {'type': 'loss', 'content': 0.1442202776670456, 'timestamp': '2025-09-30 22:26:32.313659', 'step': 9456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:32.371557', 'step': 9456, 'epoch': 2} {'type': 'loss', 'content': 0.18575820326805115, 'timestamp': '2025-09-30 22:26:32.380860', 'step': 9457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:32.447989', 'step': 9457, 'epoch': 2} {'type': 'loss', 'content': 0.1149531900882721, 'timestamp': '2025-09-30 22:26:32.456358', 'step': 9458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:32.520833', 'step': 9458, 'epoch': 2} {'type': 'loss', 'content': 0.0954255685210228, 'timestamp': '2025-09-30 22:26:32.524253', 'step': 9459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:32.581777', 'step': 9459, 'epoch': 2} {'type': 'loss', 'content': 0.11012227088212967, 'timestamp': '2025-09-30 22:26:32.596420', 'step': 9460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:32.654161', 'step': 9460, 'epoch': 2} {'type': 'loss', 'content': 0.06971557438373566, 'timestamp': '2025-09-30 22:26:32.657781', 'step': 9461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:32.726610', 'step': 9461, 'epoch': 2} {'type': 'loss', 'content': 0.17997263371944427, 'timestamp': '2025-09-30 22:26:32.729967', 'step': 9462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:32.787591', 'step': 9462, 'epoch': 2} {'type': 'loss', 'content': 0.14605095982551575, 'timestamp': '2025-09-30 22:26:32.791527', 'step': 9463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:32.848341', 'step': 9463, 'epoch': 2} {'type': 'loss', 'content': 0.2216159552335739, 'timestamp': '2025-09-30 22:26:32.862533', 'step': 9464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:32.937190', 'step': 9464, 'epoch': 2} {'type': 'loss', 'content': 0.09416702389717102, 'timestamp': '2025-09-30 22:26:32.941029', 'step': 9465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:33.009519', 'step': 9465, 'epoch': 2} {'type': 'loss', 'content': 0.09999795258045197, 'timestamp': '2025-09-30 22:26:33.014336', 'step': 9466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:33.072776', 'step': 9466, 'epoch': 2} {'type': 'loss', 'content': 0.16682159900665283, 'timestamp': '2025-09-30 22:26:33.075804', 'step': 9467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:33.139703', 'step': 9467, 'epoch': 2} {'type': 'loss', 'content': 0.18262769281864166, 'timestamp': '2025-09-30 22:26:33.155364', 'step': 9468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:33.222456', 'step': 9468, 'epoch': 2} {'type': 'loss', 'content': 0.10916813462972641, 'timestamp': '2025-09-30 22:26:33.225841', 'step': 9469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:33.292352', 'step': 9469, 'epoch': 2} {'type': 'loss', 'content': 0.2111940234899521, 'timestamp': '2025-09-30 22:26:33.302812', 'step': 9470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:33.369693', 'step': 9470, 'epoch': 2} {'type': 'loss', 'content': 0.0712546855211258, 'timestamp': '2025-09-30 22:26:33.380569', 'step': 9471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:33.448404', 'step': 9471, 'epoch': 2} {'type': 'loss', 'content': 0.12839631736278534, 'timestamp': '2025-09-30 22:26:33.464793', 'step': 9472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:33.539704', 'step': 9472, 'epoch': 2} {'type': 'loss', 'content': 0.23558886349201202, 'timestamp': '2025-09-30 22:26:33.543335', 'step': 9473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:33.612580', 'step': 9473, 'epoch': 2} {'type': 'loss', 'content': 0.1615908145904541, 'timestamp': '2025-09-30 22:26:33.615565', 'step': 9474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:33.682033', 'step': 9474, 'epoch': 2} {'type': 'loss', 'content': 0.1429624706506729, 'timestamp': '2025-09-30 22:26:33.684747', 'step': 9475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:33.749189', 'step': 9475, 'epoch': 2} {'type': 'loss', 'content': 0.0686870813369751, 'timestamp': '2025-09-30 22:26:33.756565', 'step': 9476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:33.823976', 'step': 9476, 'epoch': 2} {'type': 'loss', 'content': 0.07497242093086243, 'timestamp': '2025-09-30 22:26:33.836547', 'step': 9477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:33.894734', 'step': 9477, 'epoch': 2} {'type': 'loss', 'content': 0.15518568456172943, 'timestamp': '2025-09-30 22:26:33.898732', 'step': 9478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:33.957974', 'step': 9478, 'epoch': 2} {'type': 'loss', 'content': 0.10105366259813309, 'timestamp': '2025-09-30 22:26:33.961912', 'step': 9479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.028077', 'step': 9479, 'epoch': 2} {'type': 'loss', 'content': 0.24035927653312683, 'timestamp': '2025-09-30 22:26:34.035168', 'step': 9480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.102524', 'step': 9480, 'epoch': 2} {'type': 'loss', 'content': 0.1374177485704422, 'timestamp': '2025-09-30 22:26:34.113563', 'step': 9481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.170177', 'step': 9481, 'epoch': 2} {'type': 'loss', 'content': 0.13485561311244965, 'timestamp': '2025-09-30 22:26:34.173307', 'step': 9482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.231229', 'step': 9482, 'epoch': 2} {'type': 'loss', 'content': 0.16934619843959808, 'timestamp': '2025-09-30 22:26:34.234549', 'step': 9483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:34.293960', 'step': 9483, 'epoch': 2} {'type': 'loss', 'content': 0.11361970007419586, 'timestamp': '2025-09-30 22:26:34.300182', 'step': 9484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:34.370592', 'step': 9484, 'epoch': 2} {'type': 'loss', 'content': 0.17131741344928741, 'timestamp': '2025-09-30 22:26:34.373851', 'step': 9485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:34.432364', 'step': 9485, 'epoch': 2} {'type': 'loss', 'content': 0.09188374876976013, 'timestamp': '2025-09-30 22:26:34.442115', 'step': 9486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.507593', 'step': 9486, 'epoch': 2} {'type': 'loss', 'content': 0.10191375017166138, 'timestamp': '2025-09-30 22:26:34.520241', 'step': 9487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.587884', 'step': 9487, 'epoch': 2} {'type': 'loss', 'content': 0.12207204848527908, 'timestamp': '2025-09-30 22:26:34.596514', 'step': 9488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.664563', 'step': 9488, 'epoch': 2} {'type': 'loss', 'content': 0.11488527059555054, 'timestamp': '2025-09-30 22:26:34.667274', 'step': 9489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.724574', 'step': 9489, 'epoch': 2} {'type': 'loss', 'content': 0.12256024032831192, 'timestamp': '2025-09-30 22:26:34.732126', 'step': 9490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:34.792169', 'step': 9490, 'epoch': 2} {'type': 'loss', 'content': 0.05447300150990486, 'timestamp': '2025-09-30 22:26:34.795130', 'step': 9491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:34.857199', 'step': 9491, 'epoch': 2} {'type': 'loss', 'content': 0.09880103915929794, 'timestamp': '2025-09-30 22:26:34.872122', 'step': 9492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:34.929571', 'step': 9492, 'epoch': 2} {'type': 'loss', 'content': 0.24240367114543915, 'timestamp': '2025-09-30 22:26:34.932356', 'step': 9493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:34.989966', 'step': 9493, 'epoch': 2} {'type': 'loss', 'content': 0.10728668421506882, 'timestamp': '2025-09-30 22:26:34.994026', 'step': 9494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:35.051654', 'step': 9494, 'epoch': 2} {'type': 'loss', 'content': 0.06831564754247665, 'timestamp': '2025-09-30 22:26:35.059216', 'step': 9495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:35.125070', 'step': 9495, 'epoch': 2} {'type': 'loss', 'content': 0.18191665410995483, 'timestamp': '2025-09-30 22:26:35.131233', 'step': 9496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:35.188382', 'step': 9496, 'epoch': 2} {'type': 'loss', 'content': 0.10740116238594055, 'timestamp': '2025-09-30 22:26:35.196859', 'step': 9497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:35.253915', 'step': 9497, 'epoch': 2} {'type': 'loss', 'content': 0.19917994737625122, 'timestamp': '2025-09-30 22:26:35.256978', 'step': 9498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:35.317521', 'step': 9498, 'epoch': 2} {'type': 'loss', 'content': 0.1939680129289627, 'timestamp': '2025-09-30 22:26:35.320904', 'step': 9499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:35.377477', 'step': 9499, 'epoch': 2} {'type': 'loss', 'content': 0.10976117104291916, 'timestamp': '2025-09-30 22:26:35.388908', 'step': 9500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9500', 'timestamp': '2025-09-30 22:26:35.796973', 'step': 9500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:35.855468', 'step': 9500, 'epoch': 2} {'type': 'loss', 'content': 0.18437626957893372, 'timestamp': '2025-09-30 22:26:35.858522', 'step': 9501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:35.916017', 'step': 9501, 'epoch': 2} {'type': 'loss', 'content': 0.11558083444833755, 'timestamp': '2025-09-30 22:26:35.920642', 'step': 9502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:35.977484', 'step': 9502, 'epoch': 2} {'type': 'loss', 'content': 0.15151704847812653, 'timestamp': '2025-09-30 22:26:35.982669', 'step': 9503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:36.040805', 'step': 9503, 'epoch': 2} {'type': 'loss', 'content': 0.05725546181201935, 'timestamp': '2025-09-30 22:26:36.047438', 'step': 9504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:36.110260', 'step': 9504, 'epoch': 2} {'type': 'loss', 'content': 0.1551976054906845, 'timestamp': '2025-09-30 22:26:36.118912', 'step': 9505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:36.197417', 'step': 9505, 'epoch': 2} {'type': 'loss', 'content': 0.05040383338928223, 'timestamp': '2025-09-30 22:26:36.199761', 'step': 9506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:36.260702', 'step': 9506, 'epoch': 2} {'type': 'loss', 'content': 0.1370110958814621, 'timestamp': '2025-09-30 22:26:36.265915', 'step': 9507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:36.325434', 'step': 9507, 'epoch': 2} {'type': 'loss', 'content': 0.1730377972126007, 'timestamp': '2025-09-30 22:26:36.331920', 'step': 9508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:36.388925', 'step': 9508, 'epoch': 2} {'type': 'loss', 'content': 0.07664941251277924, 'timestamp': '2025-09-30 22:26:36.392215', 'step': 9509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:36.448956', 'step': 9509, 'epoch': 2} {'type': 'loss', 'content': 0.15605482459068298, 'timestamp': '2025-09-30 22:26:36.451624', 'step': 9510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:36.508869', 'step': 9510, 'epoch': 2} {'type': 'loss', 'content': 0.15661722421646118, 'timestamp': '2025-09-30 22:26:36.514351', 'step': 9511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:36.570949', 'step': 9511, 'epoch': 2} {'type': 'loss', 'content': 0.12331468611955643, 'timestamp': '2025-09-30 22:26:36.577320', 'step': 9512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:36.644799', 'step': 9512, 'epoch': 2} {'type': 'loss', 'content': 0.21892736852169037, 'timestamp': '2025-09-30 22:26:36.648117', 'step': 9513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:36.705182', 'step': 9513, 'epoch': 2} {'type': 'loss', 'content': 0.09389910101890564, 'timestamp': '2025-09-30 22:26:36.708734', 'step': 9514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:36.767931', 'step': 9514, 'epoch': 2} {'type': 'loss', 'content': 0.07379885017871857, 'timestamp': '2025-09-30 22:26:36.772072', 'step': 9515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:36.828800', 'step': 9515, 'epoch': 2} {'type': 'loss', 'content': 0.06970104575157166, 'timestamp': '2025-09-30 22:26:36.834900', 'step': 9516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:36.903552', 'step': 9516, 'epoch': 2} {'type': 'loss', 'content': 0.09374131262302399, 'timestamp': '2025-09-30 22:26:36.906471', 'step': 9517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:36.968185', 'step': 9517, 'epoch': 2} {'type': 'loss', 'content': 0.190412700176239, 'timestamp': '2025-09-30 22:26:36.980014', 'step': 9518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:37.052451', 'step': 9518, 'epoch': 2} {'type': 'loss', 'content': 0.10347426682710648, 'timestamp': '2025-09-30 22:26:37.055297', 'step': 9519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:37.117963', 'step': 9519, 'epoch': 2} {'type': 'loss', 'content': 0.10140571743249893, 'timestamp': '2025-09-30 22:26:37.125405', 'step': 9520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:37.191069', 'step': 9520, 'epoch': 2} {'type': 'loss', 'content': 0.11502236872911453, 'timestamp': '2025-09-30 22:26:37.194207', 'step': 9521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:37.251862', 'step': 9521, 'epoch': 2} {'type': 'loss', 'content': 0.10753408074378967, 'timestamp': '2025-09-30 22:26:37.266438', 'step': 9522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:37.336906', 'step': 9522, 'epoch': 2} {'type': 'loss', 'content': 0.10875808447599411, 'timestamp': '2025-09-30 22:26:37.345644', 'step': 9523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:37.404332', 'step': 9523, 'epoch': 2} {'type': 'loss', 'content': 0.16798454523086548, 'timestamp': '2025-09-30 22:26:37.414800', 'step': 9524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:37.477094', 'step': 9524, 'epoch': 2} {'type': 'loss', 'content': 0.13782383501529694, 'timestamp': '2025-09-30 22:26:37.485534', 'step': 9525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:37.542855', 'step': 9525, 'epoch': 2} {'type': 'loss', 'content': 0.0988360196352005, 'timestamp': '2025-09-30 22:26:37.549753', 'step': 9526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:37.607731', 'step': 9526, 'epoch': 2} {'type': 'loss', 'content': 0.14477363228797913, 'timestamp': '2025-09-30 22:26:37.611380', 'step': 9527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:37.668309', 'step': 9527, 'epoch': 2} {'type': 'loss', 'content': 0.12390065938234329, 'timestamp': '2025-09-30 22:26:37.682004', 'step': 9528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:37.746070', 'step': 9528, 'epoch': 2} {'type': 'loss', 'content': 0.14496570825576782, 'timestamp': '2025-09-30 22:26:37.749823', 'step': 9529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:37.821146', 'step': 9529, 'epoch': 2} {'type': 'loss', 'content': 0.10707535594701767, 'timestamp': '2025-09-30 22:26:37.824417', 'step': 9530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:37.881276', 'step': 9530, 'epoch': 2} {'type': 'loss', 'content': 0.21199549734592438, 'timestamp': '2025-09-30 22:26:37.884437', 'step': 9531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:37.941766', 'step': 9531, 'epoch': 2} {'type': 'loss', 'content': 0.17123094201087952, 'timestamp': '2025-09-30 22:26:37.947894', 'step': 9532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:38.012634', 'step': 9532, 'epoch': 2} {'type': 'loss', 'content': 0.12728281319141388, 'timestamp': '2025-09-30 22:26:38.015744', 'step': 9533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:38.073279', 'step': 9533, 'epoch': 2} {'type': 'loss', 'content': 0.10161732882261276, 'timestamp': '2025-09-30 22:26:38.077220', 'step': 9534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:38.142981', 'step': 9534, 'epoch': 2} {'type': 'loss', 'content': 0.09424502402544022, 'timestamp': '2025-09-30 22:26:38.146681', 'step': 9535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:38.216694', 'step': 9535, 'epoch': 2} {'type': 'loss', 'content': 0.10285785794258118, 'timestamp': '2025-09-30 22:26:38.227320', 'step': 9536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:38.290528', 'step': 9536, 'epoch': 2} {'type': 'loss', 'content': 0.19983331859111786, 'timestamp': '2025-09-30 22:26:38.300525', 'step': 9537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:38.365558', 'step': 9537, 'epoch': 2} {'type': 'loss', 'content': 0.17882420122623444, 'timestamp': '2025-09-30 22:26:38.374125', 'step': 9538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:38.434519', 'step': 9538, 'epoch': 2} {'type': 'loss', 'content': 0.12112081050872803, 'timestamp': '2025-09-30 22:26:38.437661', 'step': 9539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:38.494500', 'step': 9539, 'epoch': 2} {'type': 'loss', 'content': 0.12593409419059753, 'timestamp': '2025-09-30 22:26:38.500886', 'step': 9540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:38.564059', 'step': 9540, 'epoch': 2} {'type': 'loss', 'content': 0.10388652235269547, 'timestamp': '2025-09-30 22:26:38.572918', 'step': 9541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:38.635680', 'step': 9541, 'epoch': 2} {'type': 'loss', 'content': 0.11961517482995987, 'timestamp': '2025-09-30 22:26:38.645125', 'step': 9542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:38.702568', 'step': 9542, 'epoch': 2} {'type': 'loss', 'content': 0.13429801166057587, 'timestamp': '2025-09-30 22:26:38.709846', 'step': 9543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:38.772248', 'step': 9543, 'epoch': 2} {'type': 'loss', 'content': 0.10706932842731476, 'timestamp': '2025-09-30 22:26:38.778646', 'step': 9544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:38.835067', 'step': 9544, 'epoch': 2} {'type': 'loss', 'content': 0.0923335999250412, 'timestamp': '2025-09-30 22:26:38.838111', 'step': 9545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:38.900074', 'step': 9545, 'epoch': 2} {'type': 'loss', 'content': 0.08406976610422134, 'timestamp': '2025-09-30 22:26:38.903405', 'step': 9546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:38.962191', 'step': 9546, 'epoch': 2} {'type': 'loss', 'content': 0.17428921163082123, 'timestamp': '2025-09-30 22:26:38.965334', 'step': 9547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:39.022478', 'step': 9547, 'epoch': 2} {'type': 'loss', 'content': 0.06537655740976334, 'timestamp': '2025-09-30 22:26:39.029103', 'step': 9548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.094380', 'step': 9548, 'epoch': 2} {'type': 'loss', 'content': 0.13348336517810822, 'timestamp': '2025-09-30 22:26:39.097000', 'step': 9549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.167958', 'step': 9549, 'epoch': 2} {'type': 'loss', 'content': 0.046555306762456894, 'timestamp': '2025-09-30 22:26:39.170421', 'step': 9550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.233433', 'step': 9550, 'epoch': 2} {'type': 'loss', 'content': 0.1660183072090149, 'timestamp': '2025-09-30 22:26:39.236619', 'step': 9551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.302052', 'step': 9551, 'epoch': 2} {'type': 'loss', 'content': 0.15916474163532257, 'timestamp': '2025-09-30 22:26:39.310134', 'step': 9552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:39.380441', 'step': 9552, 'epoch': 2} {'type': 'loss', 'content': 0.20053371787071228, 'timestamp': '2025-09-30 22:26:39.384268', 'step': 9553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.441504', 'step': 9553, 'epoch': 2} {'type': 'loss', 'content': 0.1781512349843979, 'timestamp': '2025-09-30 22:26:39.450331', 'step': 9554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.514973', 'step': 9554, 'epoch': 2} {'type': 'loss', 'content': 0.09089919179677963, 'timestamp': '2025-09-30 22:26:39.518317', 'step': 9555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:39.590543', 'step': 9555, 'epoch': 2} {'type': 'loss', 'content': 0.08789284527301788, 'timestamp': '2025-09-30 22:26:39.597138', 'step': 9556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.658914', 'step': 9556, 'epoch': 2} {'type': 'loss', 'content': 0.14127087593078613, 'timestamp': '2025-09-30 22:26:39.665682', 'step': 9557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:39.728927', 'step': 9557, 'epoch': 2} {'type': 'loss', 'content': 0.17240425944328308, 'timestamp': '2025-09-30 22:26:39.737906', 'step': 9558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:39.797471', 'step': 9558, 'epoch': 2} {'type': 'loss', 'content': 0.13318435847759247, 'timestamp': '2025-09-30 22:26:39.799956', 'step': 9559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:39.864206', 'step': 9559, 'epoch': 2} {'type': 'loss', 'content': 0.12515735626220703, 'timestamp': '2025-09-30 22:26:39.871325', 'step': 9560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:39.933784', 'step': 9560, 'epoch': 2} {'type': 'loss', 'content': 0.13449503481388092, 'timestamp': '2025-09-30 22:26:39.936526', 'step': 9561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:39.997257', 'step': 9561, 'epoch': 2} {'type': 'loss', 'content': 0.17189157009124756, 'timestamp': '2025-09-30 22:26:40.003978', 'step': 9562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:40.061470', 'step': 9562, 'epoch': 2} {'type': 'loss', 'content': 0.13431650400161743, 'timestamp': '2025-09-30 22:26:40.066559', 'step': 9563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:40.130831', 'step': 9563, 'epoch': 2} {'type': 'loss', 'content': 0.13381163775920868, 'timestamp': '2025-09-30 22:26:40.137166', 'step': 9564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:40.194696', 'step': 9564, 'epoch': 2} {'type': 'loss', 'content': 0.16888725757598877, 'timestamp': '2025-09-30 22:26:40.197293', 'step': 9565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:40.262545', 'step': 9565, 'epoch': 2} {'type': 'loss', 'content': 0.1741822212934494, 'timestamp': '2025-09-30 22:26:40.268449', 'step': 9566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:40.329644', 'step': 9566, 'epoch': 2} {'type': 'loss', 'content': 0.13791826367378235, 'timestamp': '2025-09-30 22:26:40.333383', 'step': 9567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:40.403809', 'step': 9567, 'epoch': 2} {'type': 'loss', 'content': 0.13733859360218048, 'timestamp': '2025-09-30 22:26:40.409853', 'step': 9568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:40.466386', 'step': 9568, 'epoch': 2} {'type': 'loss', 'content': 0.1299414485692978, 'timestamp': '2025-09-30 22:26:40.468794', 'step': 9569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:40.526485', 'step': 9569, 'epoch': 2} {'type': 'loss', 'content': 0.07170101255178452, 'timestamp': '2025-09-30 22:26:40.537550', 'step': 9570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:40.599691', 'step': 9570, 'epoch': 2} {'type': 'loss', 'content': 0.17814458906650543, 'timestamp': '2025-09-30 22:26:40.609450', 'step': 9571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:40.683710', 'step': 9571, 'epoch': 2} {'type': 'loss', 'content': 0.13494545221328735, 'timestamp': '2025-09-30 22:26:40.689962', 'step': 9572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:40.748158', 'step': 9572, 'epoch': 2} {'type': 'loss', 'content': 0.10921144485473633, 'timestamp': '2025-09-30 22:26:40.752811', 'step': 9573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:40.815334', 'step': 9573, 'epoch': 2} {'type': 'loss', 'content': 0.15667776763439178, 'timestamp': '2025-09-30 22:26:40.820221', 'step': 9574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:40.880241', 'step': 9574, 'epoch': 2} {'type': 'loss', 'content': 0.21892590820789337, 'timestamp': '2025-09-30 22:26:40.882816', 'step': 9575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:40.951311', 'step': 9575, 'epoch': 2} {'type': 'loss', 'content': 0.16142547130584717, 'timestamp': '2025-09-30 22:26:40.960047', 'step': 9576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:41.019721', 'step': 9576, 'epoch': 2} {'type': 'loss', 'content': 0.13470792770385742, 'timestamp': '2025-09-30 22:26:41.028241', 'step': 9577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:41.087829', 'step': 9577, 'epoch': 2} {'type': 'loss', 'content': 0.10531418770551682, 'timestamp': '2025-09-30 22:26:41.090866', 'step': 9578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:41.147979', 'step': 9578, 'epoch': 2} {'type': 'loss', 'content': 0.12121202051639557, 'timestamp': '2025-09-30 22:26:41.152904', 'step': 9579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:41.212504', 'step': 9579, 'epoch': 2} {'type': 'loss', 'content': 0.09230044484138489, 'timestamp': '2025-09-30 22:26:41.222682', 'step': 9580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:41.300216', 'step': 9580, 'epoch': 2} {'type': 'loss', 'content': 0.13359017670154572, 'timestamp': '2025-09-30 22:26:41.306819', 'step': 9581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:41.374084', 'step': 9581, 'epoch': 2} {'type': 'loss', 'content': 0.14933986961841583, 'timestamp': '2025-09-30 22:26:41.381594', 'step': 9582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:41.443628', 'step': 9582, 'epoch': 2} {'type': 'loss', 'content': 0.12462665885686874, 'timestamp': '2025-09-30 22:26:41.446203', 'step': 9583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:41.507663', 'step': 9583, 'epoch': 2} {'type': 'loss', 'content': 0.1046271026134491, 'timestamp': '2025-09-30 22:26:41.513945', 'step': 9584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:41.572456', 'step': 9584, 'epoch': 2} {'type': 'loss', 'content': 0.1271047443151474, 'timestamp': '2025-09-30 22:26:41.575771', 'step': 9585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:41.635601', 'step': 9585, 'epoch': 2} {'type': 'loss', 'content': 0.15004588663578033, 'timestamp': '2025-09-30 22:26:41.639010', 'step': 9586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:41.703602', 'step': 9586, 'epoch': 2} {'type': 'loss', 'content': 0.09660498797893524, 'timestamp': '2025-09-30 22:26:41.706569', 'step': 9587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:41.765713', 'step': 9587, 'epoch': 2} {'type': 'loss', 'content': 0.22797755897045135, 'timestamp': '2025-09-30 22:26:41.771897', 'step': 9588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:41.832559', 'step': 9588, 'epoch': 2} {'type': 'loss', 'content': 0.13637398183345795, 'timestamp': '2025-09-30 22:26:41.839910', 'step': 9589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:41.902748', 'step': 9589, 'epoch': 2} {'type': 'loss', 'content': 0.07683172076940536, 'timestamp': '2025-09-30 22:26:41.911389', 'step': 9590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:41.975869', 'step': 9590, 'epoch': 2} {'type': 'loss', 'content': 0.1076701283454895, 'timestamp': '2025-09-30 22:26:41.982314', 'step': 9591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:42.046725', 'step': 9591, 'epoch': 2} {'type': 'loss', 'content': 0.18727053701877594, 'timestamp': '2025-09-30 22:26:42.055867', 'step': 9592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:42.117683', 'step': 9592, 'epoch': 2} {'type': 'loss', 'content': 0.0737248882651329, 'timestamp': '2025-09-30 22:26:42.120559', 'step': 9593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:42.179464', 'step': 9593, 'epoch': 2} {'type': 'loss', 'content': 0.07333974540233612, 'timestamp': '2025-09-30 22:26:42.187840', 'step': 9594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:42.263498', 'step': 9594, 'epoch': 2} {'type': 'loss', 'content': 0.26401183009147644, 'timestamp': '2025-09-30 22:26:42.266900', 'step': 9595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:42.325321', 'step': 9595, 'epoch': 2} {'type': 'loss', 'content': 0.07463875412940979, 'timestamp': '2025-09-30 22:26:42.332618', 'step': 9596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:42.389407', 'step': 9596, 'epoch': 2} {'type': 'loss', 'content': 0.12218424677848816, 'timestamp': '2025-09-30 22:26:42.392898', 'step': 9597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:42.451254', 'step': 9597, 'epoch': 2} {'type': 'loss', 'content': 0.18973499536514282, 'timestamp': '2025-09-30 22:26:42.453511', 'step': 9598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:42.530412', 'step': 9598, 'epoch': 2} {'type': 'loss', 'content': 0.07828398793935776, 'timestamp': '2025-09-30 22:26:42.533231', 'step': 9599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:42.595817', 'step': 9599, 'epoch': 2} {'type': 'loss', 'content': 0.09809383749961853, 'timestamp': '2025-09-30 22:26:42.607443', 'step': 9600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:42.670528', 'step': 9600, 'epoch': 2} {'type': 'loss', 'content': 0.153047576546669, 'timestamp': '2025-09-30 22:26:42.673112', 'step': 9601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:42.736351', 'step': 9601, 'epoch': 2} {'type': 'loss', 'content': 0.094785675406456, 'timestamp': '2025-09-30 22:26:42.739595', 'step': 9602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:42.797152', 'step': 9602, 'epoch': 2} {'type': 'loss', 'content': 0.13826094567775726, 'timestamp': '2025-09-30 22:26:42.805484', 'step': 9603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:42.873639', 'step': 9603, 'epoch': 2} {'type': 'loss', 'content': 0.1278451532125473, 'timestamp': '2025-09-30 22:26:42.880086', 'step': 9604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:42.936872', 'step': 9604, 'epoch': 2} {'type': 'loss', 'content': 0.1233154758810997, 'timestamp': '2025-09-30 22:26:42.939533', 'step': 9605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:42.998588', 'step': 9605, 'epoch': 2} {'type': 'loss', 'content': 0.11970465630292892, 'timestamp': '2025-09-30 22:26:43.002346', 'step': 9606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:43.059696', 'step': 9606, 'epoch': 2} {'type': 'loss', 'content': 0.10656020790338516, 'timestamp': '2025-09-30 22:26:43.062368', 'step': 9607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:43.124538', 'step': 9607, 'epoch': 2} {'type': 'loss', 'content': 0.09885065257549286, 'timestamp': '2025-09-30 22:26:43.136365', 'step': 9608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:43.194689', 'step': 9608, 'epoch': 2} {'type': 'loss', 'content': 0.1090109720826149, 'timestamp': '2025-09-30 22:26:43.202025', 'step': 9609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:43.267207', 'step': 9609, 'epoch': 2} {'type': 'loss', 'content': 0.09309053421020508, 'timestamp': '2025-09-30 22:26:43.283842', 'step': 9610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:43.354067', 'step': 9610, 'epoch': 2} {'type': 'loss', 'content': 0.17887422442436218, 'timestamp': '2025-09-30 22:26:43.357301', 'step': 9611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:43.415677', 'step': 9611, 'epoch': 2} {'type': 'loss', 'content': 0.12331502139568329, 'timestamp': '2025-09-30 22:26:43.422678', 'step': 9612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:43.486992', 'step': 9612, 'epoch': 2} {'type': 'loss', 'content': 0.07601451873779297, 'timestamp': '2025-09-30 22:26:43.490078', 'step': 9613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:43.559717', 'step': 9613, 'epoch': 2} {'type': 'loss', 'content': 0.09956303238868713, 'timestamp': '2025-09-30 22:26:43.568273', 'step': 9614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:43.631507', 'step': 9614, 'epoch': 2} {'type': 'loss', 'content': 0.12141204625368118, 'timestamp': '2025-09-30 22:26:43.639308', 'step': 9615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:43.706073', 'step': 9615, 'epoch': 2} {'type': 'loss', 'content': 0.17472824454307556, 'timestamp': '2025-09-30 22:26:43.718745', 'step': 9616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:43.795081', 'step': 9616, 'epoch': 2} {'type': 'loss', 'content': 0.15803366899490356, 'timestamp': '2025-09-30 22:26:43.797728', 'step': 9617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:43.856860', 'step': 9617, 'epoch': 2} {'type': 'loss', 'content': 0.04054928198456764, 'timestamp': '2025-09-30 22:26:43.863577', 'step': 9618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:43.933922', 'step': 9618, 'epoch': 2} {'type': 'loss', 'content': 0.19945134222507477, 'timestamp': '2025-09-30 22:26:43.937646', 'step': 9619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:43.995655', 'step': 9619, 'epoch': 2} {'type': 'loss', 'content': 0.1380569189786911, 'timestamp': '2025-09-30 22:26:44.009637', 'step': 9620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:44.075825', 'step': 9620, 'epoch': 2} {'type': 'loss', 'content': 0.11229167133569717, 'timestamp': '2025-09-30 22:26:44.079817', 'step': 9621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:44.145317', 'step': 9621, 'epoch': 2} {'type': 'loss', 'content': 0.20983943343162537, 'timestamp': '2025-09-30 22:26:44.151763', 'step': 9622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:44.214993', 'step': 9622, 'epoch': 2} {'type': 'loss', 'content': 0.17496486008167267, 'timestamp': '2025-09-30 22:26:44.222180', 'step': 9623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:44.283744', 'step': 9623, 'epoch': 2} {'type': 'loss', 'content': 0.159082293510437, 'timestamp': '2025-09-30 22:26:44.292781', 'step': 9624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:44.350646', 'step': 9624, 'epoch': 2} {'type': 'loss', 'content': 0.13603734970092773, 'timestamp': '2025-09-30 22:26:44.355983', 'step': 9625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:44.415445', 'step': 9625, 'epoch': 2} {'type': 'loss', 'content': 0.08933756500482559, 'timestamp': '2025-09-30 22:26:44.423146', 'step': 9626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:44.482759', 'step': 9626, 'epoch': 2} {'type': 'loss', 'content': 0.1336302012205124, 'timestamp': '2025-09-30 22:26:44.487722', 'step': 9627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:44.548907', 'step': 9627, 'epoch': 2} {'type': 'loss', 'content': 0.2502131462097168, 'timestamp': '2025-09-30 22:26:44.556860', 'step': 9628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:44.625859', 'step': 9628, 'epoch': 2} {'type': 'loss', 'content': 0.16060319542884827, 'timestamp': '2025-09-30 22:26:44.631751', 'step': 9629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:44.690442', 'step': 9629, 'epoch': 2} {'type': 'loss', 'content': 0.1246885359287262, 'timestamp': '2025-09-30 22:26:44.693511', 'step': 9630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:44.752136', 'step': 9630, 'epoch': 2} {'type': 'loss', 'content': 0.1842356026172638, 'timestamp': '2025-09-30 22:26:44.754974', 'step': 9631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:44.816201', 'step': 9631, 'epoch': 2} {'type': 'loss', 'content': 0.2091226875782013, 'timestamp': '2025-09-30 22:26:44.824812', 'step': 9632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:44.883246', 'step': 9632, 'epoch': 2} {'type': 'loss', 'content': 0.175259530544281, 'timestamp': '2025-09-30 22:26:44.888418', 'step': 9633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:44.949699', 'step': 9633, 'epoch': 2} {'type': 'loss', 'content': 0.15175284445285797, 'timestamp': '2025-09-30 22:26:44.955907', 'step': 9634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:45.016243', 'step': 9634, 'epoch': 2} {'type': 'loss', 'content': 0.19267018139362335, 'timestamp': '2025-09-30 22:26:45.019175', 'step': 9635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:45.076193', 'step': 9635, 'epoch': 2} {'type': 'loss', 'content': 0.0981302335858345, 'timestamp': '2025-09-30 22:26:45.083647', 'step': 9636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:45.143395', 'step': 9636, 'epoch': 2} {'type': 'loss', 'content': 0.10655215382575989, 'timestamp': '2025-09-30 22:26:45.151205', 'step': 9637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.210207', 'step': 9637, 'epoch': 2} {'type': 'loss', 'content': 0.0975719466805458, 'timestamp': '2025-09-30 22:26:45.219659', 'step': 9638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.287870', 'step': 9638, 'epoch': 2} {'type': 'loss', 'content': 0.09409023821353912, 'timestamp': '2025-09-30 22:26:45.291143', 'step': 9639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:45.349393', 'step': 9639, 'epoch': 2} {'type': 'loss', 'content': 0.1563708335161209, 'timestamp': '2025-09-30 22:26:45.361048', 'step': 9640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:45.419206', 'step': 9640, 'epoch': 2} {'type': 'loss', 'content': 0.11985879391431808, 'timestamp': '2025-09-30 22:26:45.426257', 'step': 9641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.488346', 'step': 9641, 'epoch': 2} {'type': 'loss', 'content': 0.24889017641544342, 'timestamp': '2025-09-30 22:26:45.491495', 'step': 9642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.549297', 'step': 9642, 'epoch': 2} {'type': 'loss', 'content': 0.11385338753461838, 'timestamp': '2025-09-30 22:26:45.553064', 'step': 9643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.613742', 'step': 9643, 'epoch': 2} {'type': 'loss', 'content': 0.16932202875614166, 'timestamp': '2025-09-30 22:26:45.619512', 'step': 9644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.676897', 'step': 9644, 'epoch': 2} {'type': 'loss', 'content': 0.20325720310211182, 'timestamp': '2025-09-30 22:26:45.679372', 'step': 9645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:45.737479', 'step': 9645, 'epoch': 2} {'type': 'loss', 'content': 0.15472078323364258, 'timestamp': '2025-09-30 22:26:45.740395', 'step': 9646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.797759', 'step': 9646, 'epoch': 2} {'type': 'loss', 'content': 0.1249181255698204, 'timestamp': '2025-09-30 22:26:45.804242', 'step': 9647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:45.862982', 'step': 9647, 'epoch': 2} {'type': 'loss', 'content': 0.2135361284017563, 'timestamp': '2025-09-30 22:26:45.869030', 'step': 9648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:45.926063', 'step': 9648, 'epoch': 2} {'type': 'loss', 'content': 0.09276790916919708, 'timestamp': '2025-09-30 22:26:45.928907', 'step': 9649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:45.986176', 'step': 9649, 'epoch': 2} {'type': 'loss', 'content': 0.13284878432750702, 'timestamp': '2025-09-30 22:26:45.989318', 'step': 9650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:46.047373', 'step': 9650, 'epoch': 2} {'type': 'loss', 'content': 0.08382759243249893, 'timestamp': '2025-09-30 22:26:46.050416', 'step': 9651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:46.108710', 'step': 9651, 'epoch': 2} {'type': 'loss', 'content': 0.10928928852081299, 'timestamp': '2025-09-30 22:26:46.123921', 'step': 9652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:46.181855', 'step': 9652, 'epoch': 2} {'type': 'loss', 'content': 0.11481387168169022, 'timestamp': '2025-09-30 22:26:46.185296', 'step': 9653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:46.244270', 'step': 9653, 'epoch': 2} {'type': 'loss', 'content': 0.14577509462833405, 'timestamp': '2025-09-30 22:26:46.246752', 'step': 9654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:46.319076', 'step': 9654, 'epoch': 2} {'type': 'loss', 'content': 0.15076076984405518, 'timestamp': '2025-09-30 22:26:46.321734', 'step': 9655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:46.387992', 'step': 9655, 'epoch': 2} {'type': 'loss', 'content': 0.14140437543392181, 'timestamp': '2025-09-30 22:26:46.394139', 'step': 9656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:46.450416', 'step': 9656, 'epoch': 2} {'type': 'loss', 'content': 0.10660822689533234, 'timestamp': '2025-09-30 22:26:46.454411', 'step': 9657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:46.514619', 'step': 9657, 'epoch': 2} {'type': 'loss', 'content': 0.052141040563583374, 'timestamp': '2025-09-30 22:26:46.517576', 'step': 9658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:46.579344', 'step': 9658, 'epoch': 2} {'type': 'loss', 'content': 0.13513189554214478, 'timestamp': '2025-09-30 22:26:46.589280', 'step': 9659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:46.658016', 'step': 9659, 'epoch': 2} {'type': 'loss', 'content': 0.15178050100803375, 'timestamp': '2025-09-30 22:26:46.666780', 'step': 9660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:46.726951', 'step': 9660, 'epoch': 2} {'type': 'loss', 'content': 0.08468499034643173, 'timestamp': '2025-09-30 22:26:46.730457', 'step': 9661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:26:46.797036', 'step': 9661, 'epoch': 2} {'type': 'loss', 'content': 0.1554495096206665, 'timestamp': '2025-09-30 22:26:46.806935', 'step': 9662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:46.867252', 'step': 9662, 'epoch': 2} {'type': 'loss', 'content': 0.11513753235340118, 'timestamp': '2025-09-30 22:26:46.870744', 'step': 9663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:46.929167', 'step': 9663, 'epoch': 2} {'type': 'loss', 'content': 0.07057162374258041, 'timestamp': '2025-09-30 22:26:46.937217', 'step': 9664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:46.996407', 'step': 9664, 'epoch': 2} {'type': 'loss', 'content': 0.08994906395673752, 'timestamp': '2025-09-30 22:26:47.000127', 'step': 9665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:47.059360', 'step': 9665, 'epoch': 2} {'type': 'loss', 'content': 0.21620503067970276, 'timestamp': '2025-09-30 22:26:47.062861', 'step': 9666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:47.144982', 'step': 9666, 'epoch': 2} {'type': 'loss', 'content': 0.14921748638153076, 'timestamp': '2025-09-30 22:26:47.152423', 'step': 9667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:47.214814', 'step': 9667, 'epoch': 2} {'type': 'loss', 'content': 0.1356523483991623, 'timestamp': '2025-09-30 22:26:47.226426', 'step': 9668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:47.283978', 'step': 9668, 'epoch': 2} {'type': 'loss', 'content': 0.08980152010917664, 'timestamp': '2025-09-30 22:26:47.286966', 'step': 9669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:47.345120', 'step': 9669, 'epoch': 2} {'type': 'loss', 'content': 0.17078833281993866, 'timestamp': '2025-09-30 22:26:47.347746', 'step': 9670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:47.404786', 'step': 9670, 'epoch': 2} {'type': 'loss', 'content': 0.2528531849384308, 'timestamp': '2025-09-30 22:26:47.408499', 'step': 9671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:47.465877', 'step': 9671, 'epoch': 2} {'type': 'loss', 'content': 0.28703105449676514, 'timestamp': '2025-09-30 22:26:47.472622', 'step': 9672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:47.528306', 'step': 9672, 'epoch': 2} {'type': 'loss', 'content': 0.05811288580298424, 'timestamp': '2025-09-30 22:26:47.532496', 'step': 9673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:47.592987', 'step': 9673, 'epoch': 2} {'type': 'loss', 'content': 0.16414545476436615, 'timestamp': '2025-09-30 22:26:47.596270', 'step': 9674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:47.660142', 'step': 9674, 'epoch': 2} {'type': 'loss', 'content': 0.16729727387428284, 'timestamp': '2025-09-30 22:26:47.662987', 'step': 9675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:47.725087', 'step': 9675, 'epoch': 2} {'type': 'loss', 'content': 0.16685812175273895, 'timestamp': '2025-09-30 22:26:47.739238', 'step': 9676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:47.803294', 'step': 9676, 'epoch': 2} {'type': 'loss', 'content': 0.07352912425994873, 'timestamp': '2025-09-30 22:26:47.807710', 'step': 9677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:47.867235', 'step': 9677, 'epoch': 2} {'type': 'loss', 'content': 0.23222516477108002, 'timestamp': '2025-09-30 22:26:47.869559', 'step': 9678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:47.937544', 'step': 9678, 'epoch': 2} {'type': 'loss', 'content': 0.13726235926151276, 'timestamp': '2025-09-30 22:26:47.948366', 'step': 9679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:26:48.014394', 'step': 9679, 'epoch': 2} {'type': 'loss', 'content': 0.16446354985237122, 'timestamp': '2025-09-30 22:26:48.020694', 'step': 9680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:48.087351', 'step': 9680, 'epoch': 2} {'type': 'loss', 'content': 0.09552090615034103, 'timestamp': '2025-09-30 22:26:48.095916', 'step': 9681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:48.160461', 'step': 9681, 'epoch': 2} {'type': 'loss', 'content': 0.09249182045459747, 'timestamp': '2025-09-30 22:26:48.165042', 'step': 9682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:48.222612', 'step': 9682, 'epoch': 2} {'type': 'loss', 'content': 0.18712656199932098, 'timestamp': '2025-09-30 22:26:48.225044', 'step': 9683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:48.295713', 'step': 9683, 'epoch': 2} {'type': 'loss', 'content': 0.16479840874671936, 'timestamp': '2025-09-30 22:26:48.306886', 'step': 9684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:48.366655', 'step': 9684, 'epoch': 2} {'type': 'loss', 'content': 0.13698774576187134, 'timestamp': '2025-09-30 22:26:48.368976', 'step': 9685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:48.428149', 'step': 9685, 'epoch': 2} {'type': 'loss', 'content': 0.16962186992168427, 'timestamp': '2025-09-30 22:26:48.433381', 'step': 9686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:48.489862', 'step': 9686, 'epoch': 2} {'type': 'loss', 'content': 0.15657103061676025, 'timestamp': '2025-09-30 22:26:48.493279', 'step': 9687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:48.551359', 'step': 9687, 'epoch': 2} {'type': 'loss', 'content': 0.11568757891654968, 'timestamp': '2025-09-30 22:26:48.557927', 'step': 9688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:48.615604', 'step': 9688, 'epoch': 2} {'type': 'loss', 'content': 0.14557595551013947, 'timestamp': '2025-09-30 22:26:48.618462', 'step': 9689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:48.675385', 'step': 9689, 'epoch': 2} {'type': 'loss', 'content': 0.1293427050113678, 'timestamp': '2025-09-30 22:26:48.678146', 'step': 9690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:48.736082', 'step': 9690, 'epoch': 2} {'type': 'loss', 'content': 0.08250286430120468, 'timestamp': '2025-09-30 22:26:48.738998', 'step': 9691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:48.798513', 'step': 9691, 'epoch': 2} {'type': 'loss', 'content': 0.13779014348983765, 'timestamp': '2025-09-30 22:26:48.805498', 'step': 9692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:48.866079', 'step': 9692, 'epoch': 2} {'type': 'loss', 'content': 0.07608542591333389, 'timestamp': '2025-09-30 22:26:48.869218', 'step': 9693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:48.926372', 'step': 9693, 'epoch': 2} {'type': 'loss', 'content': 0.1649741530418396, 'timestamp': '2025-09-30 22:26:48.929383', 'step': 9694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:48.988140', 'step': 9694, 'epoch': 2} {'type': 'loss', 'content': 0.1985507309436798, 'timestamp': '2025-09-30 22:26:48.991710', 'step': 9695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:49.054900', 'step': 9695, 'epoch': 2} {'type': 'loss', 'content': 0.1190696507692337, 'timestamp': '2025-09-30 22:26:49.065700', 'step': 9696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:49.129763', 'step': 9696, 'epoch': 2} {'type': 'loss', 'content': 0.18600429594516754, 'timestamp': '2025-09-30 22:26:49.141262', 'step': 9697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:49.212318', 'step': 9697, 'epoch': 2} {'type': 'loss', 'content': 0.13388167321681976, 'timestamp': '2025-09-30 22:26:49.220085', 'step': 9698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:49.282609', 'step': 9698, 'epoch': 2} {'type': 'loss', 'content': 0.09908971190452576, 'timestamp': '2025-09-30 22:26:49.285890', 'step': 9699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:49.345463', 'step': 9699, 'epoch': 2} {'type': 'loss', 'content': 0.08534201979637146, 'timestamp': '2025-09-30 22:26:49.358131', 'step': 9700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:49.414462', 'step': 9700, 'epoch': 2} {'type': 'loss', 'content': 0.1754305362701416, 'timestamp': '2025-09-30 22:26:49.417587', 'step': 9701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:49.480079', 'step': 9701, 'epoch': 2} {'type': 'loss', 'content': 0.14485026895999908, 'timestamp': '2025-09-30 22:26:49.486015', 'step': 9702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:49.544001', 'step': 9702, 'epoch': 2} {'type': 'loss', 'content': 0.20116205513477325, 'timestamp': '2025-09-30 22:26:49.547684', 'step': 9703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:49.608668', 'step': 9703, 'epoch': 2} {'type': 'loss', 'content': 0.04814933240413666, 'timestamp': '2025-09-30 22:26:49.614775', 'step': 9704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:49.671962', 'step': 9704, 'epoch': 2} {'type': 'loss', 'content': 0.2151487171649933, 'timestamp': '2025-09-30 22:26:49.674435', 'step': 9705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:49.732135', 'step': 9705, 'epoch': 2} {'type': 'loss', 'content': 0.13962741196155548, 'timestamp': '2025-09-30 22:26:49.734887', 'step': 9706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:49.803873', 'step': 9706, 'epoch': 2} {'type': 'loss', 'content': 0.1358547955751419, 'timestamp': '2025-09-30 22:26:49.807308', 'step': 9707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:49.875615', 'step': 9707, 'epoch': 2} {'type': 'loss', 'content': 0.09735961258411407, 'timestamp': '2025-09-30 22:26:49.881478', 'step': 9708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:49.940771', 'step': 9708, 'epoch': 2} {'type': 'loss', 'content': 0.0857527107000351, 'timestamp': '2025-09-30 22:26:49.943181', 'step': 9709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:50.002361', 'step': 9709, 'epoch': 2} {'type': 'loss', 'content': 0.147886261343956, 'timestamp': '2025-09-30 22:26:50.006376', 'step': 9710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:50.079553', 'step': 9710, 'epoch': 2} {'type': 'loss', 'content': 0.09987135231494904, 'timestamp': '2025-09-30 22:26:50.082865', 'step': 9711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:50.142393', 'step': 9711, 'epoch': 2} {'type': 'loss', 'content': 0.15741665661334991, 'timestamp': '2025-09-30 22:26:50.149524', 'step': 9712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:50.206743', 'step': 9712, 'epoch': 2} {'type': 'loss', 'content': 0.11050524562597275, 'timestamp': '2025-09-30 22:26:50.210361', 'step': 9713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:50.280401', 'step': 9713, 'epoch': 2} {'type': 'loss', 'content': 0.11434978991746902, 'timestamp': '2025-09-30 22:26:50.283418', 'step': 9714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:26:50.342973', 'step': 9714, 'epoch': 2} {'type': 'loss', 'content': 0.14714452624320984, 'timestamp': '2025-09-30 22:26:50.347442', 'step': 9715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:50.418570', 'step': 9715, 'epoch': 2} {'type': 'loss', 'content': 0.11135523021221161, 'timestamp': '2025-09-30 22:26:50.426046', 'step': 9716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:50.483258', 'step': 9716, 'epoch': 2} {'type': 'loss', 'content': 0.16851842403411865, 'timestamp': '2025-09-30 22:26:50.485661', 'step': 9717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:50.549120', 'step': 9717, 'epoch': 2} {'type': 'loss', 'content': 0.07515119016170502, 'timestamp': '2025-09-30 22:26:50.556303', 'step': 9718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:50.617372', 'step': 9718, 'epoch': 2} {'type': 'loss', 'content': 0.20509999990463257, 'timestamp': '2025-09-30 22:26:50.619886', 'step': 9719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:50.689259', 'step': 9719, 'epoch': 2} {'type': 'loss', 'content': 0.12832188606262207, 'timestamp': '2025-09-30 22:26:50.695782', 'step': 9720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:50.753978', 'step': 9720, 'epoch': 2} {'type': 'loss', 'content': 0.11940336972475052, 'timestamp': '2025-09-30 22:26:50.762150', 'step': 9721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:50.853199', 'step': 9721, 'epoch': 2} {'type': 'loss', 'content': 0.17144036293029785, 'timestamp': '2025-09-30 22:26:50.860572', 'step': 9722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:50.934077', 'step': 9722, 'epoch': 2} {'type': 'loss', 'content': 0.09428783506155014, 'timestamp': '2025-09-30 22:26:50.941159', 'step': 9723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:51.013980', 'step': 9723, 'epoch': 2} {'type': 'loss', 'content': 0.10777418315410614, 'timestamp': '2025-09-30 22:26:51.024793', 'step': 9724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:51.082133', 'step': 9724, 'epoch': 2} {'type': 'loss', 'content': 0.10335179418325424, 'timestamp': '2025-09-30 22:26:51.087150', 'step': 9725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:51.147759', 'step': 9725, 'epoch': 2} {'type': 'loss', 'content': 0.1487707644701004, 'timestamp': '2025-09-30 22:26:51.151481', 'step': 9726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:51.211587', 'step': 9726, 'epoch': 2} {'type': 'loss', 'content': 0.16194559633731842, 'timestamp': '2025-09-30 22:26:51.215670', 'step': 9727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:51.275948', 'step': 9727, 'epoch': 2} {'type': 'loss', 'content': 0.06923879683017731, 'timestamp': '2025-09-30 22:26:51.284612', 'step': 9728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:51.357688', 'step': 9728, 'epoch': 2} {'type': 'loss', 'content': 0.20805466175079346, 'timestamp': '2025-09-30 22:26:51.365042', 'step': 9729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:51.428593', 'step': 9729, 'epoch': 2} {'type': 'loss', 'content': 0.09540358930826187, 'timestamp': '2025-09-30 22:26:51.433754', 'step': 9730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:51.493139', 'step': 9730, 'epoch': 2} {'type': 'loss', 'content': 0.14821882545948029, 'timestamp': '2025-09-30 22:26:51.496121', 'step': 9731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:51.559769', 'step': 9731, 'epoch': 2} {'type': 'loss', 'content': 0.10861369967460632, 'timestamp': '2025-09-30 22:26:51.570962', 'step': 9732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:51.627061', 'step': 9732, 'epoch': 2} {'type': 'loss', 'content': 0.15469977259635925, 'timestamp': '2025-09-30 22:26:51.630407', 'step': 9733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:51.688730', 'step': 9733, 'epoch': 2} {'type': 'loss', 'content': 0.08385098725557327, 'timestamp': '2025-09-30 22:26:51.692446', 'step': 9734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:51.749514', 'step': 9734, 'epoch': 2} {'type': 'loss', 'content': 0.15334127843379974, 'timestamp': '2025-09-30 22:26:51.752396', 'step': 9735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:51.810105', 'step': 9735, 'epoch': 2} {'type': 'loss', 'content': 0.17769259214401245, 'timestamp': '2025-09-30 22:26:51.816290', 'step': 9736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:51.874068', 'step': 9736, 'epoch': 2} {'type': 'loss', 'content': 0.1612665355205536, 'timestamp': '2025-09-30 22:26:51.880337', 'step': 9737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:51.941326', 'step': 9737, 'epoch': 2} {'type': 'loss', 'content': 0.15880465507507324, 'timestamp': '2025-09-30 22:26:51.944739', 'step': 9738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:52.009819', 'step': 9738, 'epoch': 2} {'type': 'loss', 'content': 0.18217556178569794, 'timestamp': '2025-09-30 22:26:52.015691', 'step': 9739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:52.081883', 'step': 9739, 'epoch': 2} {'type': 'loss', 'content': 0.1878412812948227, 'timestamp': '2025-09-30 22:26:52.096422', 'step': 9740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:52.154681', 'step': 9740, 'epoch': 2} {'type': 'loss', 'content': 0.20402750372886658, 'timestamp': '2025-09-30 22:26:52.157663', 'step': 9741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:52.229944', 'step': 9741, 'epoch': 2} {'type': 'loss', 'content': 0.16307152807712555, 'timestamp': '2025-09-30 22:26:52.235111', 'step': 9742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:52.312678', 'step': 9742, 'epoch': 2} {'type': 'loss', 'content': 0.1645486205816269, 'timestamp': '2025-09-30 22:26:52.315574', 'step': 9743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:52.373518', 'step': 9743, 'epoch': 2} {'type': 'loss', 'content': 0.1434979885816574, 'timestamp': '2025-09-30 22:26:52.380234', 'step': 9744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:52.438740', 'step': 9744, 'epoch': 2} {'type': 'loss', 'content': 0.08927659690380096, 'timestamp': '2025-09-30 22:26:52.443958', 'step': 9745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:52.501615', 'step': 9745, 'epoch': 2} {'type': 'loss', 'content': 0.07227902114391327, 'timestamp': '2025-09-30 22:26:52.508524', 'step': 9746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:52.577837', 'step': 9746, 'epoch': 2} {'type': 'loss', 'content': 0.07278284430503845, 'timestamp': '2025-09-30 22:26:52.583257', 'step': 9747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:52.643960', 'step': 9747, 'epoch': 2} {'type': 'loss', 'content': 0.14951598644256592, 'timestamp': '2025-09-30 22:26:52.654363', 'step': 9748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:26:52.711465', 'step': 9748, 'epoch': 2} {'type': 'loss', 'content': 0.1723862588405609, 'timestamp': '2025-09-30 22:26:52.715281', 'step': 9749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:52.777687', 'step': 9749, 'epoch': 2} {'type': 'loss', 'content': 0.17039038240909576, 'timestamp': '2025-09-30 22:26:52.780252', 'step': 9750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:52.841480', 'step': 9750, 'epoch': 2} {'type': 'loss', 'content': 0.22950945794582367, 'timestamp': '2025-09-30 22:26:52.847566', 'step': 9751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:52.914293', 'step': 9751, 'epoch': 2} {'type': 'loss', 'content': 0.11256205290555954, 'timestamp': '2025-09-30 22:26:52.923256', 'step': 9752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:52.987288', 'step': 9752, 'epoch': 2} {'type': 'loss', 'content': 0.1632634699344635, 'timestamp': '2025-09-30 22:26:52.992770', 'step': 9753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:53.053562', 'step': 9753, 'epoch': 2} {'type': 'loss', 'content': 0.11998073756694794, 'timestamp': '2025-09-30 22:26:53.060647', 'step': 9754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:53.118107', 'step': 9754, 'epoch': 2} {'type': 'loss', 'content': 0.2013605684041977, 'timestamp': '2025-09-30 22:26:53.125066', 'step': 9755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:53.194107', 'step': 9755, 'epoch': 2} {'type': 'loss', 'content': 0.09692951291799545, 'timestamp': '2025-09-30 22:26:53.200886', 'step': 9756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:53.274896', 'step': 9756, 'epoch': 2} {'type': 'loss', 'content': 0.1169910654425621, 'timestamp': '2025-09-30 22:26:53.277798', 'step': 9757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:53.350681', 'step': 9757, 'epoch': 2} {'type': 'loss', 'content': 0.0734754353761673, 'timestamp': '2025-09-30 22:26:53.353306', 'step': 9758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:53.416697', 'step': 9758, 'epoch': 2} {'type': 'loss', 'content': 0.1273612082004547, 'timestamp': '2025-09-30 22:26:53.421971', 'step': 9759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:53.480555', 'step': 9759, 'epoch': 2} {'type': 'loss', 'content': 0.14253386855125427, 'timestamp': '2025-09-30 22:26:53.487465', 'step': 9760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:53.552047', 'step': 9760, 'epoch': 2} {'type': 'loss', 'content': 0.09754280745983124, 'timestamp': '2025-09-30 22:26:53.560231', 'step': 9761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:53.631579', 'step': 9761, 'epoch': 2} {'type': 'loss', 'content': 0.13443100452423096, 'timestamp': '2025-09-30 22:26:53.641526', 'step': 9762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:53.708649', 'step': 9762, 'epoch': 2} {'type': 'loss', 'content': 0.21276968717575073, 'timestamp': '2025-09-30 22:26:53.713740', 'step': 9763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:53.778453', 'step': 9763, 'epoch': 2} {'type': 'loss', 'content': 0.15327119827270508, 'timestamp': '2025-09-30 22:26:53.787115', 'step': 9764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:53.845115', 'step': 9764, 'epoch': 2} {'type': 'loss', 'content': 0.1414346545934677, 'timestamp': '2025-09-30 22:26:53.847347', 'step': 9765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:53.905364', 'step': 9765, 'epoch': 2} {'type': 'loss', 'content': 0.129095658659935, 'timestamp': '2025-09-30 22:26:53.915530', 'step': 9766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:53.986749', 'step': 9766, 'epoch': 2} {'type': 'loss', 'content': 0.07045940309762955, 'timestamp': '2025-09-30 22:26:53.992750', 'step': 9767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:54.065896', 'step': 9767, 'epoch': 2} {'type': 'loss', 'content': 0.23309653997421265, 'timestamp': '2025-09-30 22:26:54.072706', 'step': 9768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:54.134997', 'step': 9768, 'epoch': 2} {'type': 'loss', 'content': 0.16762201488018036, 'timestamp': '2025-09-30 22:26:54.143366', 'step': 9769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:54.204618', 'step': 9769, 'epoch': 2} {'type': 'loss', 'content': 0.18776658177375793, 'timestamp': '2025-09-30 22:26:54.207547', 'step': 9770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:54.264405', 'step': 9770, 'epoch': 2} {'type': 'loss', 'content': 0.07117511332035065, 'timestamp': '2025-09-30 22:26:54.266888', 'step': 9771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:54.323984', 'step': 9771, 'epoch': 2} {'type': 'loss', 'content': 0.09656649082899094, 'timestamp': '2025-09-30 22:26:54.330225', 'step': 9772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:26:54.388586', 'step': 9772, 'epoch': 2} {'type': 'loss', 'content': 0.10964510589838028, 'timestamp': '2025-09-30 22:26:54.397581', 'step': 9773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:54.457643', 'step': 9773, 'epoch': 2} {'type': 'loss', 'content': 0.0922548770904541, 'timestamp': '2025-09-30 22:26:54.463250', 'step': 9774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:54.520240', 'step': 9774, 'epoch': 2} {'type': 'loss', 'content': 0.11898213624954224, 'timestamp': '2025-09-30 22:26:54.523445', 'step': 9775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:26:54.587498', 'step': 9775, 'epoch': 2} {'type': 'loss', 'content': 0.09038365632295609, 'timestamp': '2025-09-30 22:26:54.593422', 'step': 9776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:54.651793', 'step': 9776, 'epoch': 2} {'type': 'loss', 'content': 0.06339407712221146, 'timestamp': '2025-09-30 22:26:54.654158', 'step': 9777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:54.710585', 'step': 9777, 'epoch': 2} {'type': 'loss', 'content': 0.08673576265573502, 'timestamp': '2025-09-30 22:26:54.713548', 'step': 9778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:54.775622', 'step': 9778, 'epoch': 2} {'type': 'loss', 'content': 0.16642074286937714, 'timestamp': '2025-09-30 22:26:54.779853', 'step': 9779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:26:54.851142', 'step': 9779, 'epoch': 2} {'type': 'loss', 'content': 0.07090864330530167, 'timestamp': '2025-09-30 22:26:54.857520', 'step': 9780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:54.913170', 'step': 9780, 'epoch': 2} {'type': 'loss', 'content': 0.20861323177814484, 'timestamp': '2025-09-30 22:26:54.915569', 'step': 9781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:54.972700', 'step': 9781, 'epoch': 2} {'type': 'loss', 'content': 0.12942957878112793, 'timestamp': '2025-09-30 22:26:54.977669', 'step': 9782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:26:55.037617', 'step': 9782, 'epoch': 2} {'type': 'loss', 'content': 0.10275125503540039, 'timestamp': '2025-09-30 22:26:55.041538', 'step': 9783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:55.102571', 'step': 9783, 'epoch': 2} {'type': 'loss', 'content': 0.20328481495380402, 'timestamp': '2025-09-30 22:26:55.108815', 'step': 9784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:26:55.175221', 'step': 9784, 'epoch': 2} {'type': 'loss', 'content': 0.22782078385353088, 'timestamp': '2025-09-30 22:26:55.179887', 'step': 9785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:26:55.245673', 'step': 9785, 'epoch': 2} {'type': 'loss', 'content': 0.09168804436922073, 'timestamp': '2025-09-30 22:26:55.248356', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:27:10.395805', 'step': 9786, 'epoch': 2} {'type': 'pplx', 'content': 11863.305212529905, 'timestamp': '2025-09-30 22:27:10.399557', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:10.458142', 'step': 9786, 'epoch': 2} {'type': 'loss', 'content': 0.11537090688943863, 'timestamp': '2025-09-30 22:27:10.465087', 'step': 9787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:10.529808', 'step': 9787, 'epoch': 2} {'type': 'loss', 'content': 0.07721883058547974, 'timestamp': '2025-09-30 22:27:10.542536', 'step': 9788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:10.605483', 'step': 9788, 'epoch': 2} {'type': 'loss', 'content': 0.12719178199768066, 'timestamp': '2025-09-30 22:27:10.615004', 'step': 9789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:10.685639', 'step': 9789, 'epoch': 2} {'type': 'loss', 'content': 0.0726914331316948, 'timestamp': '2025-09-30 22:27:10.689799', 'step': 9790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:10.761335', 'step': 9790, 'epoch': 2} {'type': 'loss', 'content': 0.12566982209682465, 'timestamp': '2025-09-30 22:27:10.767990', 'step': 9791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:10.840114', 'step': 9791, 'epoch': 2} {'type': 'loss', 'content': 0.21197588741779327, 'timestamp': '2025-09-30 22:27:10.846644', 'step': 9792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:10.916248', 'step': 9792, 'epoch': 2} {'type': 'loss', 'content': 0.09732478857040405, 'timestamp': '2025-09-30 22:27:10.925545', 'step': 9793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:10.986683', 'step': 9793, 'epoch': 2} {'type': 'loss', 'content': 0.17633529007434845, 'timestamp': '2025-09-30 22:27:10.993185', 'step': 9794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:11.056479', 'step': 9794, 'epoch': 2} {'type': 'loss', 'content': 0.1184249296784401, 'timestamp': '2025-09-30 22:27:11.059164', 'step': 9795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:11.118873', 'step': 9795, 'epoch': 2} {'type': 'loss', 'content': 0.12453943490982056, 'timestamp': '2025-09-30 22:27:11.125376', 'step': 9796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:11.181750', 'step': 9796, 'epoch': 2} {'type': 'loss', 'content': 0.12554770708084106, 'timestamp': '2025-09-30 22:27:11.185257', 'step': 9797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:11.241845', 'step': 9797, 'epoch': 2} {'type': 'loss', 'content': 0.0791998878121376, 'timestamp': '2025-09-30 22:27:11.244922', 'step': 9798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:11.302276', 'step': 9798, 'epoch': 2} {'type': 'loss', 'content': 0.09078850597143173, 'timestamp': '2025-09-30 22:27:11.304883', 'step': 9799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:11.365796', 'step': 9799, 'epoch': 2} {'type': 'loss', 'content': 0.1562398225069046, 'timestamp': '2025-09-30 22:27:11.375462', 'step': 9800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:11.431566', 'step': 9800, 'epoch': 2} {'type': 'loss', 'content': 0.13620144128799438, 'timestamp': '2025-09-30 22:27:11.435049', 'step': 9801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:11.501859', 'step': 9801, 'epoch': 2} {'type': 'loss', 'content': 0.11425279080867767, 'timestamp': '2025-09-30 22:27:11.512023', 'step': 9802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:11.569527', 'step': 9802, 'epoch': 2} {'type': 'loss', 'content': 0.15423095226287842, 'timestamp': '2025-09-30 22:27:11.572459', 'step': 9803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:11.639247', 'step': 9803, 'epoch': 2} {'type': 'loss', 'content': 0.10118380188941956, 'timestamp': '2025-09-30 22:27:11.645598', 'step': 9804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:11.708005', 'step': 9804, 'epoch': 2} {'type': 'loss', 'content': 0.16404402256011963, 'timestamp': '2025-09-30 22:27:11.711005', 'step': 9805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:11.775476', 'step': 9805, 'epoch': 2} {'type': 'loss', 'content': 0.1132727563381195, 'timestamp': '2025-09-30 22:27:11.778678', 'step': 9806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:11.838345', 'step': 9806, 'epoch': 2} {'type': 'loss', 'content': 0.073857381939888, 'timestamp': '2025-09-30 22:27:11.841485', 'step': 9807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:11.904166', 'step': 9807, 'epoch': 2} {'type': 'loss', 'content': 0.0977330207824707, 'timestamp': '2025-09-30 22:27:11.911089', 'step': 9808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:11.968749', 'step': 9808, 'epoch': 2} {'type': 'loss', 'content': 0.16796967387199402, 'timestamp': '2025-09-30 22:27:11.971938', 'step': 9809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:12.033033', 'step': 9809, 'epoch': 2} {'type': 'loss', 'content': 0.09570230543613434, 'timestamp': '2025-09-30 22:27:12.041022', 'step': 9810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:12.099258', 'step': 9810, 'epoch': 2} {'type': 'loss', 'content': 0.20649144053459167, 'timestamp': '2025-09-30 22:27:12.102058', 'step': 9811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:12.159659', 'step': 9811, 'epoch': 2} {'type': 'loss', 'content': 0.0545743890106678, 'timestamp': '2025-09-30 22:27:12.165743', 'step': 9812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:12.223156', 'step': 9812, 'epoch': 2} {'type': 'loss', 'content': 0.07351689785718918, 'timestamp': '2025-09-30 22:27:12.225846', 'step': 9813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:12.282787', 'step': 9813, 'epoch': 2} {'type': 'loss', 'content': 0.05156935006380081, 'timestamp': '2025-09-30 22:27:12.285291', 'step': 9814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:12.342172', 'step': 9814, 'epoch': 2} {'type': 'loss', 'content': 0.12694936990737915, 'timestamp': '2025-09-30 22:27:12.344627', 'step': 9815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:12.405639', 'step': 9815, 'epoch': 2} {'type': 'loss', 'content': 0.12167923152446747, 'timestamp': '2025-09-30 22:27:12.411723', 'step': 9816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:12.468177', 'step': 9816, 'epoch': 2} {'type': 'loss', 'content': 0.11741829663515091, 'timestamp': '2025-09-30 22:27:12.472188', 'step': 9817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:12.531047', 'step': 9817, 'epoch': 2} {'type': 'loss', 'content': 0.09617488086223602, 'timestamp': '2025-09-30 22:27:12.534001', 'step': 9818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:12.591680', 'step': 9818, 'epoch': 2} {'type': 'loss', 'content': 0.14343413710594177, 'timestamp': '2025-09-30 22:27:12.594094', 'step': 9819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:12.651786', 'step': 9819, 'epoch': 2} {'type': 'loss', 'content': 0.15057721734046936, 'timestamp': '2025-09-30 22:27:12.658418', 'step': 9820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:12.715301', 'step': 9820, 'epoch': 2} {'type': 'loss', 'content': 0.1416531801223755, 'timestamp': '2025-09-30 22:27:12.718181', 'step': 9821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:12.776392', 'step': 9821, 'epoch': 2} {'type': 'loss', 'content': 0.18643973767757416, 'timestamp': '2025-09-30 22:27:12.784339', 'step': 9822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:12.844862', 'step': 9822, 'epoch': 2} {'type': 'loss', 'content': 0.12246660143136978, 'timestamp': '2025-09-30 22:27:12.847594', 'step': 9823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:27:12.905498', 'step': 9823, 'epoch': 2} {'type': 'loss', 'content': 0.3175635039806366, 'timestamp': '2025-09-30 22:27:12.915079', 'step': 9824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:12.975466', 'step': 9824, 'epoch': 2} {'type': 'loss', 'content': 0.1764463186264038, 'timestamp': '2025-09-30 22:27:12.978308', 'step': 9825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:13.040573', 'step': 9825, 'epoch': 2} {'type': 'loss', 'content': 0.2102530598640442, 'timestamp': '2025-09-30 22:27:13.043737', 'step': 9826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:13.101122', 'step': 9826, 'epoch': 2} {'type': 'loss', 'content': 0.16192197799682617, 'timestamp': '2025-09-30 22:27:13.103953', 'step': 9827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:13.162068', 'step': 9827, 'epoch': 2} {'type': 'loss', 'content': 0.1139773279428482, 'timestamp': '2025-09-30 22:27:13.168256', 'step': 9828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:13.224915', 'step': 9828, 'epoch': 2} {'type': 'loss', 'content': 0.18503117561340332, 'timestamp': '2025-09-30 22:27:13.230771', 'step': 9829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:13.287485', 'step': 9829, 'epoch': 2} {'type': 'loss', 'content': 0.08653485774993896, 'timestamp': '2025-09-30 22:27:13.289914', 'step': 9830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:13.346722', 'step': 9830, 'epoch': 2} {'type': 'loss', 'content': 0.07743371278047562, 'timestamp': '2025-09-30 22:27:13.350368', 'step': 9831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:13.407740', 'step': 9831, 'epoch': 2} {'type': 'loss', 'content': 0.09864859282970428, 'timestamp': '2025-09-30 22:27:13.419601', 'step': 9832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:13.477296', 'step': 9832, 'epoch': 2} {'type': 'loss', 'content': 0.14268989861011505, 'timestamp': '2025-09-30 22:27:13.481173', 'step': 9833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:13.540077', 'step': 9833, 'epoch': 2} {'type': 'loss', 'content': 0.14683449268341064, 'timestamp': '2025-09-30 22:27:13.542915', 'step': 9834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:13.607629', 'step': 9834, 'epoch': 2} {'type': 'loss', 'content': 0.1096484586596489, 'timestamp': '2025-09-30 22:27:13.612281', 'step': 9835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:13.672246', 'step': 9835, 'epoch': 2} {'type': 'loss', 'content': 0.17887869477272034, 'timestamp': '2025-09-30 22:27:13.679013', 'step': 9836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:13.736087', 'step': 9836, 'epoch': 2} {'type': 'loss', 'content': 0.2276320904493332, 'timestamp': '2025-09-30 22:27:13.738567', 'step': 9837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:13.795245', 'step': 9837, 'epoch': 2} {'type': 'loss', 'content': 0.1678597629070282, 'timestamp': '2025-09-30 22:27:13.799844', 'step': 9838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:13.859271', 'step': 9838, 'epoch': 2} {'type': 'loss', 'content': 0.12956684827804565, 'timestamp': '2025-09-30 22:27:13.861761', 'step': 9839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:13.922326', 'step': 9839, 'epoch': 2} {'type': 'loss', 'content': 0.07763515412807465, 'timestamp': '2025-09-30 22:27:13.930260', 'step': 9840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:13.986848', 'step': 9840, 'epoch': 2} {'type': 'loss', 'content': 0.0991532951593399, 'timestamp': '2025-09-30 22:27:13.992033', 'step': 9841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:14.051653', 'step': 9841, 'epoch': 2} {'type': 'loss', 'content': 0.1921774297952652, 'timestamp': '2025-09-30 22:27:14.057115', 'step': 9842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:14.122428', 'step': 9842, 'epoch': 2} {'type': 'loss', 'content': 0.18200811743736267, 'timestamp': '2025-09-30 22:27:14.125021', 'step': 9843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:14.191730', 'step': 9843, 'epoch': 2} {'type': 'loss', 'content': 0.22635522484779358, 'timestamp': '2025-09-30 22:27:14.200583', 'step': 9844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:14.271573', 'step': 9844, 'epoch': 2} {'type': 'loss', 'content': 0.15083269774913788, 'timestamp': '2025-09-30 22:27:14.274342', 'step': 9845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:14.344630', 'step': 9845, 'epoch': 2} {'type': 'loss', 'content': 0.15341295301914215, 'timestamp': '2025-09-30 22:27:14.348053', 'step': 9846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:14.406671', 'step': 9846, 'epoch': 2} {'type': 'loss', 'content': 0.14043503999710083, 'timestamp': '2025-09-30 22:27:14.411035', 'step': 9847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:14.467661', 'step': 9847, 'epoch': 2} {'type': 'loss', 'content': 0.11209850758314133, 'timestamp': '2025-09-30 22:27:14.473640', 'step': 9848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:14.531922', 'step': 9848, 'epoch': 2} {'type': 'loss', 'content': 0.13722622394561768, 'timestamp': '2025-09-30 22:27:14.534563', 'step': 9849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:14.592356', 'step': 9849, 'epoch': 2} {'type': 'loss', 'content': 0.16174393892288208, 'timestamp': '2025-09-30 22:27:14.595540', 'step': 9850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:14.653603', 'step': 9850, 'epoch': 2} {'type': 'loss', 'content': 0.19500108063220978, 'timestamp': '2025-09-30 22:27:14.657558', 'step': 9851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:14.714794', 'step': 9851, 'epoch': 2} {'type': 'loss', 'content': 0.19117841124534607, 'timestamp': '2025-09-30 22:27:14.721275', 'step': 9852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:14.782909', 'step': 9852, 'epoch': 2} {'type': 'loss', 'content': 0.1868903487920761, 'timestamp': '2025-09-30 22:27:14.785283', 'step': 9853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:14.856189', 'step': 9853, 'epoch': 2} {'type': 'loss', 'content': 0.10227036476135254, 'timestamp': '2025-09-30 22:27:14.868553', 'step': 9854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:14.925625', 'step': 9854, 'epoch': 2} {'type': 'loss', 'content': 0.1597285121679306, 'timestamp': '2025-09-30 22:27:14.928172', 'step': 9855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:14.986573', 'step': 9855, 'epoch': 2} {'type': 'loss', 'content': 0.161357119679451, 'timestamp': '2025-09-30 22:27:14.993517', 'step': 9856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:15.049338', 'step': 9856, 'epoch': 2} {'type': 'loss', 'content': 0.07318564504384995, 'timestamp': '2025-09-30 22:27:15.051972', 'step': 9857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:15.108040', 'step': 9857, 'epoch': 2} {'type': 'loss', 'content': 0.09960076957941055, 'timestamp': '2025-09-30 22:27:15.111174', 'step': 9858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:15.168054', 'step': 9858, 'epoch': 2} {'type': 'loss', 'content': 0.18155579268932343, 'timestamp': '2025-09-30 22:27:15.170503', 'step': 9859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:15.230286', 'step': 9859, 'epoch': 2} {'type': 'loss', 'content': 0.08593179285526276, 'timestamp': '2025-09-30 22:27:15.238252', 'step': 9860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:15.308732', 'step': 9860, 'epoch': 2} {'type': 'loss', 'content': 0.14810672402381897, 'timestamp': '2025-09-30 22:27:15.312184', 'step': 9861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:15.370070', 'step': 9861, 'epoch': 2} {'type': 'loss', 'content': 0.08471814543008804, 'timestamp': '2025-09-30 22:27:15.376215', 'step': 9862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:15.436744', 'step': 9862, 'epoch': 2} {'type': 'loss', 'content': 0.14841802418231964, 'timestamp': '2025-09-30 22:27:15.443008', 'step': 9863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:15.502955', 'step': 9863, 'epoch': 2} {'type': 'loss', 'content': 0.11970152705907822, 'timestamp': '2025-09-30 22:27:15.509433', 'step': 9864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:15.568408', 'step': 9864, 'epoch': 2} {'type': 'loss', 'content': 0.10552651435136795, 'timestamp': '2025-09-30 22:27:15.571056', 'step': 9865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:15.633954', 'step': 9865, 'epoch': 2} {'type': 'loss', 'content': 0.12197518348693848, 'timestamp': '2025-09-30 22:27:15.637540', 'step': 9866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:15.702484', 'step': 9866, 'epoch': 2} {'type': 'loss', 'content': 0.12645986676216125, 'timestamp': '2025-09-30 22:27:15.705543', 'step': 9867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:15.777609', 'step': 9867, 'epoch': 2} {'type': 'loss', 'content': 0.08284676820039749, 'timestamp': '2025-09-30 22:27:15.785156', 'step': 9868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:15.841465', 'step': 9868, 'epoch': 2} {'type': 'loss', 'content': 0.16703225672245026, 'timestamp': '2025-09-30 22:27:15.845415', 'step': 9869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:15.909782', 'step': 9869, 'epoch': 2} {'type': 'loss', 'content': 0.14719943702220917, 'timestamp': '2025-09-30 22:27:15.912116', 'step': 9870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:15.987179', 'step': 9870, 'epoch': 2} {'type': 'loss', 'content': 0.23906290531158447, 'timestamp': '2025-09-30 22:27:15.991199', 'step': 9871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:16.049483', 'step': 9871, 'epoch': 2} {'type': 'loss', 'content': 0.20475421845912933, 'timestamp': '2025-09-30 22:27:16.056002', 'step': 9872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:16.114506', 'step': 9872, 'epoch': 2} {'type': 'loss', 'content': 0.13162337243556976, 'timestamp': '2025-09-30 22:27:16.119006', 'step': 9873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:16.184859', 'step': 9873, 'epoch': 2} {'type': 'loss', 'content': 0.13347427546977997, 'timestamp': '2025-09-30 22:27:16.187742', 'step': 9874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:16.245054', 'step': 9874, 'epoch': 2} {'type': 'loss', 'content': 0.1541355401277542, 'timestamp': '2025-09-30 22:27:16.251397', 'step': 9875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:16.311979', 'step': 9875, 'epoch': 2} {'type': 'loss', 'content': 0.2082408368587494, 'timestamp': '2025-09-30 22:27:16.323676', 'step': 9876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:16.381564', 'step': 9876, 'epoch': 2} {'type': 'loss', 'content': 0.1526760756969452, 'timestamp': '2025-09-30 22:27:16.384095', 'step': 9877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:16.442651', 'step': 9877, 'epoch': 2} {'type': 'loss', 'content': 0.26510801911354065, 'timestamp': '2025-09-30 22:27:16.447043', 'step': 9878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:16.505745', 'step': 9878, 'epoch': 2} {'type': 'loss', 'content': 0.14573398232460022, 'timestamp': '2025-09-30 22:27:16.509436', 'step': 9879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:16.579069', 'step': 9879, 'epoch': 2} {'type': 'loss', 'content': 0.099259153008461, 'timestamp': '2025-09-30 22:27:16.585381', 'step': 9880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:16.643456', 'step': 9880, 'epoch': 2} {'type': 'loss', 'content': 0.17445063591003418, 'timestamp': '2025-09-30 22:27:16.646738', 'step': 9881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:16.705579', 'step': 9881, 'epoch': 2} {'type': 'loss', 'content': 0.11711150407791138, 'timestamp': '2025-09-30 22:27:16.708862', 'step': 9882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:16.766966', 'step': 9882, 'epoch': 2} {'type': 'loss', 'content': 0.16571448743343353, 'timestamp': '2025-09-30 22:27:16.769701', 'step': 9883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:16.826331', 'step': 9883, 'epoch': 2} {'type': 'loss', 'content': 0.07907947897911072, 'timestamp': '2025-09-30 22:27:16.834327', 'step': 9884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:16.891936', 'step': 9884, 'epoch': 2} {'type': 'loss', 'content': 0.16629621386528015, 'timestamp': '2025-09-30 22:27:16.895305', 'step': 9885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:16.952623', 'step': 9885, 'epoch': 2} {'type': 'loss', 'content': 0.1400248110294342, 'timestamp': '2025-09-30 22:27:16.954805', 'step': 9886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.020767', 'step': 9886, 'epoch': 2} {'type': 'loss', 'content': 0.0855979472398758, 'timestamp': '2025-09-30 22:27:17.024379', 'step': 9887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:17.083136', 'step': 9887, 'epoch': 2} {'type': 'loss', 'content': 0.1993061751127243, 'timestamp': '2025-09-30 22:27:17.089949', 'step': 9888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.147317', 'step': 9888, 'epoch': 2} {'type': 'loss', 'content': 0.10410015285015106, 'timestamp': '2025-09-30 22:27:17.149868', 'step': 9889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:17.208091', 'step': 9889, 'epoch': 2} {'type': 'loss', 'content': 0.0847969725728035, 'timestamp': '2025-09-30 22:27:17.210935', 'step': 9890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.275646', 'step': 9890, 'epoch': 2} {'type': 'loss', 'content': 0.1670520007610321, 'timestamp': '2025-09-30 22:27:17.283927', 'step': 9891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.349820', 'step': 9891, 'epoch': 2} {'type': 'loss', 'content': 0.13398344814777374, 'timestamp': '2025-09-30 22:27:17.357414', 'step': 9892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.426664', 'step': 9892, 'epoch': 2} {'type': 'loss', 'content': 0.10248026996850967, 'timestamp': '2025-09-30 22:27:17.430269', 'step': 9893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:17.494542', 'step': 9893, 'epoch': 2} {'type': 'loss', 'content': 0.0822002962231636, 'timestamp': '2025-09-30 22:27:17.497974', 'step': 9894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.560002', 'step': 9894, 'epoch': 2} {'type': 'loss', 'content': 0.23296165466308594, 'timestamp': '2025-09-30 22:27:17.564229', 'step': 9895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.621617', 'step': 9895, 'epoch': 2} {'type': 'loss', 'content': 0.09861256927251816, 'timestamp': '2025-09-30 22:27:17.633033', 'step': 9896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.690176', 'step': 9896, 'epoch': 2} {'type': 'loss', 'content': 0.0944005697965622, 'timestamp': '2025-09-30 22:27:17.693120', 'step': 9897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.750677', 'step': 9897, 'epoch': 2} {'type': 'loss', 'content': 0.1129756048321724, 'timestamp': '2025-09-30 22:27:17.753774', 'step': 9898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:17.818922', 'step': 9898, 'epoch': 2} {'type': 'loss', 'content': 0.12444721907377243, 'timestamp': '2025-09-30 22:27:17.822634', 'step': 9899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:17.882785', 'step': 9899, 'epoch': 2} {'type': 'loss', 'content': 0.136820450425148, 'timestamp': '2025-09-30 22:27:17.895496', 'step': 9900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:17.961775', 'step': 9900, 'epoch': 2} {'type': 'loss', 'content': 0.18172670900821686, 'timestamp': '2025-09-30 22:27:17.964631', 'step': 9901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:18.030065', 'step': 9901, 'epoch': 2} {'type': 'loss', 'content': 0.17278032004833221, 'timestamp': '2025-09-30 22:27:18.033384', 'step': 9902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:18.119593', 'step': 9902, 'epoch': 2} {'type': 'loss', 'content': 0.12745553255081177, 'timestamp': '2025-09-30 22:27:18.122382', 'step': 9903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:18.179661', 'step': 9903, 'epoch': 2} {'type': 'loss', 'content': 0.21089038252830505, 'timestamp': '2025-09-30 22:27:18.186432', 'step': 9904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:18.260465', 'step': 9904, 'epoch': 2} {'type': 'loss', 'content': 0.024329014122486115, 'timestamp': '2025-09-30 22:27:18.263735', 'step': 9905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:18.328187', 'step': 9905, 'epoch': 2} {'type': 'loss', 'content': 0.13587002456188202, 'timestamp': '2025-09-30 22:27:18.339452', 'step': 9906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:18.399767', 'step': 9906, 'epoch': 2} {'type': 'loss', 'content': 0.13227872550487518, 'timestamp': '2025-09-30 22:27:18.404551', 'step': 9907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:18.463048', 'step': 9907, 'epoch': 2} {'type': 'loss', 'content': 0.1648421287536621, 'timestamp': '2025-09-30 22:27:18.477023', 'step': 9908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:18.535090', 'step': 9908, 'epoch': 2} {'type': 'loss', 'content': 0.1080586165189743, 'timestamp': '2025-09-30 22:27:18.538316', 'step': 9909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:18.596247', 'step': 9909, 'epoch': 2} {'type': 'loss', 'content': 0.14597110450267792, 'timestamp': '2025-09-30 22:27:18.604281', 'step': 9910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:18.675190', 'step': 9910, 'epoch': 2} {'type': 'loss', 'content': 0.08613517880439758, 'timestamp': '2025-09-30 22:27:18.678525', 'step': 9911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:18.736661', 'step': 9911, 'epoch': 2} {'type': 'loss', 'content': 0.18301263451576233, 'timestamp': '2025-09-30 22:27:18.744837', 'step': 9912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:18.801373', 'step': 9912, 'epoch': 2} {'type': 'loss', 'content': 0.13604635000228882, 'timestamp': '2025-09-30 22:27:18.806218', 'step': 9913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:18.871895', 'step': 9913, 'epoch': 2} {'type': 'loss', 'content': 0.07475868612527847, 'timestamp': '2025-09-30 22:27:18.875196', 'step': 9914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:18.932182', 'step': 9914, 'epoch': 2} {'type': 'loss', 'content': 0.1377808302640915, 'timestamp': '2025-09-30 22:27:18.935364', 'step': 9915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:18.992235', 'step': 9915, 'epoch': 2} {'type': 'loss', 'content': 0.14887508749961853, 'timestamp': '2025-09-30 22:27:18.999294', 'step': 9916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:19.055462', 'step': 9916, 'epoch': 2} {'type': 'loss', 'content': 0.12092514336109161, 'timestamp': '2025-09-30 22:27:19.058433', 'step': 9917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:19.120047', 'step': 9917, 'epoch': 2} {'type': 'loss', 'content': 0.14430086314678192, 'timestamp': '2025-09-30 22:27:19.123033', 'step': 9918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:19.179782', 'step': 9918, 'epoch': 2} {'type': 'loss', 'content': 0.13880160450935364, 'timestamp': '2025-09-30 22:27:19.182747', 'step': 9919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:19.240290', 'step': 9919, 'epoch': 2} {'type': 'loss', 'content': 0.09259206056594849, 'timestamp': '2025-09-30 22:27:19.248531', 'step': 9920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:19.306161', 'step': 9920, 'epoch': 2} {'type': 'loss', 'content': 0.13756375014781952, 'timestamp': '2025-09-30 22:27:19.314492', 'step': 9921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:19.379386', 'step': 9921, 'epoch': 2} {'type': 'loss', 'content': 0.12533056735992432, 'timestamp': '2025-09-30 22:27:19.387405', 'step': 9922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:19.450966', 'step': 9922, 'epoch': 2} {'type': 'loss', 'content': 0.18824291229248047, 'timestamp': '2025-09-30 22:27:19.454434', 'step': 9923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:19.517330', 'step': 9923, 'epoch': 2} {'type': 'loss', 'content': 0.1165517121553421, 'timestamp': '2025-09-30 22:27:19.524321', 'step': 9924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:19.580433', 'step': 9924, 'epoch': 2} {'type': 'loss', 'content': 0.2006223201751709, 'timestamp': '2025-09-30 22:27:19.584071', 'step': 9925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:19.642901', 'step': 9925, 'epoch': 2} {'type': 'loss', 'content': 0.11271866410970688, 'timestamp': '2025-09-30 22:27:19.646317', 'step': 9926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:19.705324', 'step': 9926, 'epoch': 2} {'type': 'loss', 'content': 0.0834517627954483, 'timestamp': '2025-09-30 22:27:19.708734', 'step': 9927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:19.774337', 'step': 9927, 'epoch': 2} {'type': 'loss', 'content': 0.18441621959209442, 'timestamp': '2025-09-30 22:27:19.780293', 'step': 9928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:19.837074', 'step': 9928, 'epoch': 2} {'type': 'loss', 'content': 0.08103856444358826, 'timestamp': '2025-09-30 22:27:19.843028', 'step': 9929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:19.924778', 'step': 9929, 'epoch': 2} {'type': 'loss', 'content': 0.2642044425010681, 'timestamp': '2025-09-30 22:27:19.927378', 'step': 9930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:19.989896', 'step': 9930, 'epoch': 2} {'type': 'loss', 'content': 0.10255815833806992, 'timestamp': '2025-09-30 22:27:19.993463', 'step': 9931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:20.050287', 'step': 9931, 'epoch': 2} {'type': 'loss', 'content': 0.11028706282377243, 'timestamp': '2025-09-30 22:27:20.056466', 'step': 9932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:20.112910', 'step': 9932, 'epoch': 2} {'type': 'loss', 'content': 0.1136244386434555, 'timestamp': '2025-09-30 22:27:20.121972', 'step': 9933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:20.182775', 'step': 9933, 'epoch': 2} {'type': 'loss', 'content': 0.18541033565998077, 'timestamp': '2025-09-30 22:27:20.185221', 'step': 9934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:20.243790', 'step': 9934, 'epoch': 2} {'type': 'loss', 'content': 0.08325649052858353, 'timestamp': '2025-09-30 22:27:20.246017', 'step': 9935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:20.303789', 'step': 9935, 'epoch': 2} {'type': 'loss', 'content': 0.1410517692565918, 'timestamp': '2025-09-30 22:27:20.309808', 'step': 9936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:20.366327', 'step': 9936, 'epoch': 2} {'type': 'loss', 'content': 0.18028469383716583, 'timestamp': '2025-09-30 22:27:20.369863', 'step': 9937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:20.426173', 'step': 9937, 'epoch': 2} {'type': 'loss', 'content': 0.07740577310323715, 'timestamp': '2025-09-30 22:27:20.429233', 'step': 9938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:20.488278', 'step': 9938, 'epoch': 2} {'type': 'loss', 'content': 0.12761835753917694, 'timestamp': '2025-09-30 22:27:20.493115', 'step': 9939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:20.550513', 'step': 9939, 'epoch': 2} {'type': 'loss', 'content': 0.07800602912902832, 'timestamp': '2025-09-30 22:27:20.558975', 'step': 9940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:20.615885', 'step': 9940, 'epoch': 2} {'type': 'loss', 'content': 0.20402051508426666, 'timestamp': '2025-09-30 22:27:20.618377', 'step': 9941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:20.675663', 'step': 9941, 'epoch': 2} {'type': 'loss', 'content': 0.07641241699457169, 'timestamp': '2025-09-30 22:27:20.680088', 'step': 9942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:20.738953', 'step': 9942, 'epoch': 2} {'type': 'loss', 'content': 0.16771763563156128, 'timestamp': '2025-09-30 22:27:20.741520', 'step': 9943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:20.799635', 'step': 9943, 'epoch': 2} {'type': 'loss', 'content': 0.15877355635166168, 'timestamp': '2025-09-30 22:27:20.806508', 'step': 9944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:20.862837', 'step': 9944, 'epoch': 2} {'type': 'loss', 'content': 0.18794769048690796, 'timestamp': '2025-09-30 22:27:20.867652', 'step': 9945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:20.924320', 'step': 9945, 'epoch': 2} {'type': 'loss', 'content': 0.17731423676013947, 'timestamp': '2025-09-30 22:27:20.927468', 'step': 9946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:20.984352', 'step': 9946, 'epoch': 2} {'type': 'loss', 'content': 0.07999873906373978, 'timestamp': '2025-09-30 22:27:20.987193', 'step': 9947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:21.055692', 'step': 9947, 'epoch': 2} {'type': 'loss', 'content': 0.14994092285633087, 'timestamp': '2025-09-30 22:27:21.061553', 'step': 9948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:21.117786', 'step': 9948, 'epoch': 2} {'type': 'loss', 'content': 0.14353030920028687, 'timestamp': '2025-09-30 22:27:21.121880', 'step': 9949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:21.180386', 'step': 9949, 'epoch': 2} {'type': 'loss', 'content': 0.12888944149017334, 'timestamp': '2025-09-30 22:27:21.183408', 'step': 9950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:21.242861', 'step': 9950, 'epoch': 2} {'type': 'loss', 'content': 0.1532253623008728, 'timestamp': '2025-09-30 22:27:21.246171', 'step': 9951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:21.305607', 'step': 9951, 'epoch': 2} {'type': 'loss', 'content': 0.1096850112080574, 'timestamp': '2025-09-30 22:27:21.317756', 'step': 9952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:21.375589', 'step': 9952, 'epoch': 2} {'type': 'loss', 'content': 0.07845202833414078, 'timestamp': '2025-09-30 22:27:21.378155', 'step': 9953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:21.446525', 'step': 9953, 'epoch': 2} {'type': 'loss', 'content': 0.2348884642124176, 'timestamp': '2025-09-30 22:27:21.449836', 'step': 9954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:21.507840', 'step': 9954, 'epoch': 2} {'type': 'loss', 'content': 0.1972460150718689, 'timestamp': '2025-09-30 22:27:21.512869', 'step': 9955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:21.585787', 'step': 9955, 'epoch': 2} {'type': 'loss', 'content': 0.09360535442829132, 'timestamp': '2025-09-30 22:27:21.595917', 'step': 9956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:21.651524', 'step': 9956, 'epoch': 2} {'type': 'loss', 'content': 0.14187707006931305, 'timestamp': '2025-09-30 22:27:21.655567', 'step': 9957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:21.718845', 'step': 9957, 'epoch': 2} {'type': 'loss', 'content': 0.12620076537132263, 'timestamp': '2025-09-30 22:27:21.721737', 'step': 9958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:21.782124', 'step': 9958, 'epoch': 2} {'type': 'loss', 'content': 0.08099772036075592, 'timestamp': '2025-09-30 22:27:21.784584', 'step': 9959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:21.844214', 'step': 9959, 'epoch': 2} {'type': 'loss', 'content': 0.17073076963424683, 'timestamp': '2025-09-30 22:27:21.850370', 'step': 9960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:21.907095', 'step': 9960, 'epoch': 2} {'type': 'loss', 'content': 0.14643247425556183, 'timestamp': '2025-09-30 22:27:21.913261', 'step': 9961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:21.977025', 'step': 9961, 'epoch': 2} {'type': 'loss', 'content': 0.17792800068855286, 'timestamp': '2025-09-30 22:27:21.980010', 'step': 9962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:22.043522', 'step': 9962, 'epoch': 2} {'type': 'loss', 'content': 0.08675634860992432, 'timestamp': '2025-09-30 22:27:22.046186', 'step': 9963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:22.103563', 'step': 9963, 'epoch': 2} {'type': 'loss', 'content': 0.09731979668140411, 'timestamp': '2025-09-30 22:27:22.110470', 'step': 9964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:22.167593', 'step': 9964, 'epoch': 2} {'type': 'loss', 'content': 0.212917298078537, 'timestamp': '2025-09-30 22:27:22.173501', 'step': 9965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:22.232053', 'step': 9965, 'epoch': 2} {'type': 'loss', 'content': 0.0837860107421875, 'timestamp': '2025-09-30 22:27:22.234952', 'step': 9966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:22.297314', 'step': 9966, 'epoch': 2} {'type': 'loss', 'content': 0.07773137092590332, 'timestamp': '2025-09-30 22:27:22.302756', 'step': 9967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:22.369950', 'step': 9967, 'epoch': 2} {'type': 'loss', 'content': 0.2286849021911621, 'timestamp': '2025-09-30 22:27:22.376780', 'step': 9968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:22.433245', 'step': 9968, 'epoch': 2} {'type': 'loss', 'content': 0.0651722401380539, 'timestamp': '2025-09-30 22:27:22.438990', 'step': 9969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:22.499602', 'step': 9969, 'epoch': 2} {'type': 'loss', 'content': 0.1259237676858902, 'timestamp': '2025-09-30 22:27:22.507246', 'step': 9970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:22.568435', 'step': 9970, 'epoch': 2} {'type': 'loss', 'content': 0.16030018031597137, 'timestamp': '2025-09-30 22:27:22.571219', 'step': 9971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:22.631171', 'step': 9971, 'epoch': 2} {'type': 'loss', 'content': 0.11136237531900406, 'timestamp': '2025-09-30 22:27:22.642458', 'step': 9972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:22.700351', 'step': 9972, 'epoch': 2} {'type': 'loss', 'content': 0.23452246189117432, 'timestamp': '2025-09-30 22:27:22.703301', 'step': 9973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:22.761548', 'step': 9973, 'epoch': 2} {'type': 'loss', 'content': 0.11819159239530563, 'timestamp': '2025-09-30 22:27:22.764735', 'step': 9974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:22.822097', 'step': 9974, 'epoch': 2} {'type': 'loss', 'content': 0.1540394276380539, 'timestamp': '2025-09-30 22:27:22.824985', 'step': 9975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:22.884342', 'step': 9975, 'epoch': 2} {'type': 'loss', 'content': 0.1565794199705124, 'timestamp': '2025-09-30 22:27:22.890843', 'step': 9976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:22.947928', 'step': 9976, 'epoch': 2} {'type': 'loss', 'content': 0.1666378527879715, 'timestamp': '2025-09-30 22:27:22.953413', 'step': 9977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:23.011672', 'step': 9977, 'epoch': 2} {'type': 'loss', 'content': 0.08386167138814926, 'timestamp': '2025-09-30 22:27:23.014695', 'step': 9978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:23.073011', 'step': 9978, 'epoch': 2} {'type': 'loss', 'content': 0.14744441211223602, 'timestamp': '2025-09-30 22:27:23.075653', 'step': 9979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:23.137529', 'step': 9979, 'epoch': 2} {'type': 'loss', 'content': 0.11525562405586243, 'timestamp': '2025-09-30 22:27:23.145009', 'step': 9980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:23.208797', 'step': 9980, 'epoch': 2} {'type': 'loss', 'content': 0.23334401845932007, 'timestamp': '2025-09-30 22:27:23.215528', 'step': 9981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:23.280591', 'step': 9981, 'epoch': 2} {'type': 'loss', 'content': 0.07570146024227142, 'timestamp': '2025-09-30 22:27:23.292021', 'step': 9982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:23.357184', 'step': 9982, 'epoch': 2} {'type': 'loss', 'content': 0.10732980072498322, 'timestamp': '2025-09-30 22:27:23.360189', 'step': 9983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:23.419087', 'step': 9983, 'epoch': 2} {'type': 'loss', 'content': 0.11242277175188065, 'timestamp': '2025-09-30 22:27:23.431186', 'step': 9984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:23.488150', 'step': 9984, 'epoch': 2} {'type': 'loss', 'content': 0.1347656100988388, 'timestamp': '2025-09-30 22:27:23.490634', 'step': 9985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:23.548366', 'step': 9985, 'epoch': 2} {'type': 'loss', 'content': 0.08666209876537323, 'timestamp': '2025-09-30 22:27:23.557327', 'step': 9986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:23.617805', 'step': 9986, 'epoch': 2} {'type': 'loss', 'content': 0.14533135294914246, 'timestamp': '2025-09-30 22:27:23.620666', 'step': 9987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:23.677787', 'step': 9987, 'epoch': 2} {'type': 'loss', 'content': 0.16885992884635925, 'timestamp': '2025-09-30 22:27:23.685504', 'step': 9988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:23.744725', 'step': 9988, 'epoch': 2} {'type': 'loss', 'content': 0.17802558839321136, 'timestamp': '2025-09-30 22:27:23.754339', 'step': 9989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:23.813108', 'step': 9989, 'epoch': 2} {'type': 'loss', 'content': 0.16635578870773315, 'timestamp': '2025-09-30 22:27:23.818739', 'step': 9990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:23.885121', 'step': 9990, 'epoch': 2} {'type': 'loss', 'content': 0.17522476613521576, 'timestamp': '2025-09-30 22:27:23.888354', 'step': 9991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:23.951699', 'step': 9991, 'epoch': 2} {'type': 'loss', 'content': 0.1435558795928955, 'timestamp': '2025-09-30 22:27:23.959013', 'step': 9992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:24.023725', 'step': 9992, 'epoch': 2} {'type': 'loss', 'content': 0.11831901967525482, 'timestamp': '2025-09-30 22:27:24.026223', 'step': 9993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:24.082776', 'step': 9993, 'epoch': 2} {'type': 'loss', 'content': 0.09537768363952637, 'timestamp': '2025-09-30 22:27:24.086019', 'step': 9994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:24.142716', 'step': 9994, 'epoch': 2} {'type': 'loss', 'content': 0.12948091328144073, 'timestamp': '2025-09-30 22:27:24.146216', 'step': 9995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:24.209068', 'step': 9995, 'epoch': 2} {'type': 'loss', 'content': 0.16630539298057556, 'timestamp': '2025-09-30 22:27:24.215372', 'step': 9996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:24.276359', 'step': 9996, 'epoch': 2} {'type': 'loss', 'content': 0.045688092708587646, 'timestamp': '2025-09-30 22:27:24.283214', 'step': 9997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:24.340296', 'step': 9997, 'epoch': 2} {'type': 'loss', 'content': 0.08716277778148651, 'timestamp': '2025-09-30 22:27:24.343673', 'step': 9998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:24.401891', 'step': 9998, 'epoch': 2} {'type': 'loss', 'content': 0.12843845784664154, 'timestamp': '2025-09-30 22:27:24.404970', 'step': 9999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:24.461047', 'step': 9999, 'epoch': 2} {'type': 'loss', 'content': 0.11020959913730621, 'timestamp': '2025-09-30 22:27:24.467060', 'step': 10000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10000', 'timestamp': '2025-09-30 22:27:24.891324', 'step': 10000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:24.948525', 'step': 10000, 'epoch': 2} {'type': 'loss', 'content': 0.16307169198989868, 'timestamp': '2025-09-30 22:27:24.951533', 'step': 10001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:25.008079', 'step': 10001, 'epoch': 2} {'type': 'loss', 'content': 0.08084561675786972, 'timestamp': '2025-09-30 22:27:25.011469', 'step': 10002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:25.075764', 'step': 10002, 'epoch': 2} {'type': 'loss', 'content': 0.07819051295518875, 'timestamp': '2025-09-30 22:27:25.078925', 'step': 10003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:25.136149', 'step': 10003, 'epoch': 2} {'type': 'loss', 'content': 0.13572585582733154, 'timestamp': '2025-09-30 22:27:25.142908', 'step': 10004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:25.199284', 'step': 10004, 'epoch': 2} {'type': 'loss', 'content': 0.13390745222568512, 'timestamp': '2025-09-30 22:27:25.204640', 'step': 10005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:25.261384', 'step': 10005, 'epoch': 2} {'type': 'loss', 'content': 0.11559025198221207, 'timestamp': '2025-09-30 22:27:25.265190', 'step': 10006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:25.322926', 'step': 10006, 'epoch': 2} {'type': 'loss', 'content': 0.16559812426567078, 'timestamp': '2025-09-30 22:27:25.329868', 'step': 10007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:25.393972', 'step': 10007, 'epoch': 2} {'type': 'loss', 'content': 0.24464809894561768, 'timestamp': '2025-09-30 22:27:25.400782', 'step': 10008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:25.457249', 'step': 10008, 'epoch': 2} {'type': 'loss', 'content': 0.13836973905563354, 'timestamp': '2025-09-30 22:27:25.461164', 'step': 10009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:25.517932', 'step': 10009, 'epoch': 2} {'type': 'loss', 'content': 0.07941821962594986, 'timestamp': '2025-09-30 22:27:25.521169', 'step': 10010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:25.582854', 'step': 10010, 'epoch': 2} {'type': 'loss', 'content': 0.14956294000148773, 'timestamp': '2025-09-30 22:27:25.586515', 'step': 10011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:25.656644', 'step': 10011, 'epoch': 2} {'type': 'loss', 'content': 0.063447006046772, 'timestamp': '2025-09-30 22:27:25.666762', 'step': 10012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:25.722939', 'step': 10012, 'epoch': 2} {'type': 'loss', 'content': 0.17898370325565338, 'timestamp': '2025-09-30 22:27:25.725554', 'step': 10013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:25.782196', 'step': 10013, 'epoch': 2} {'type': 'loss', 'content': 0.16413399577140808, 'timestamp': '2025-09-30 22:27:25.785180', 'step': 10014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:25.846683', 'step': 10014, 'epoch': 2} {'type': 'loss', 'content': 0.12528447806835175, 'timestamp': '2025-09-30 22:27:25.849692', 'step': 10015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:25.907710', 'step': 10015, 'epoch': 2} {'type': 'loss', 'content': 0.10129986703395844, 'timestamp': '2025-09-30 22:27:25.918373', 'step': 10016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:25.975153', 'step': 10016, 'epoch': 2} {'type': 'loss', 'content': 0.08534915000200272, 'timestamp': '2025-09-30 22:27:25.977593', 'step': 10017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:26.034798', 'step': 10017, 'epoch': 2} {'type': 'loss', 'content': 0.13297338783740997, 'timestamp': '2025-09-30 22:27:26.037896', 'step': 10018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:26.104887', 'step': 10018, 'epoch': 2} {'type': 'loss', 'content': 0.05893482267856598, 'timestamp': '2025-09-30 22:27:26.108420', 'step': 10019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:26.169595', 'step': 10019, 'epoch': 2} {'type': 'loss', 'content': 0.14995017647743225, 'timestamp': '2025-09-30 22:27:26.176024', 'step': 10020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:26.235070', 'step': 10020, 'epoch': 2} {'type': 'loss', 'content': 0.08211839944124222, 'timestamp': '2025-09-30 22:27:26.243287', 'step': 10021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:26.302173', 'step': 10021, 'epoch': 2} {'type': 'loss', 'content': 0.03924717381596565, 'timestamp': '2025-09-30 22:27:26.305736', 'step': 10022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:26.363449', 'step': 10022, 'epoch': 2} {'type': 'loss', 'content': 0.08397294580936432, 'timestamp': '2025-09-30 22:27:26.366620', 'step': 10023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:26.435246', 'step': 10023, 'epoch': 2} {'type': 'loss', 'content': 0.03571010380983353, 'timestamp': '2025-09-30 22:27:26.443035', 'step': 10024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:26.513221', 'step': 10024, 'epoch': 2} {'type': 'loss', 'content': 0.12299872934818268, 'timestamp': '2025-09-30 22:27:26.531279', 'step': 10025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:26.588498', 'step': 10025, 'epoch': 2} {'type': 'loss', 'content': 0.09204509854316711, 'timestamp': '2025-09-30 22:27:26.592341', 'step': 10026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:26.657298', 'step': 10026, 'epoch': 2} {'type': 'loss', 'content': 0.11709859222173691, 'timestamp': '2025-09-30 22:27:26.660033', 'step': 10027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:26.720400', 'step': 10027, 'epoch': 2} {'type': 'loss', 'content': 0.12897977232933044, 'timestamp': '2025-09-30 22:27:26.738565', 'step': 10028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:26.796094', 'step': 10028, 'epoch': 2} {'type': 'loss', 'content': 0.09451408684253693, 'timestamp': '2025-09-30 22:27:26.806022', 'step': 10029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:26.867500', 'step': 10029, 'epoch': 2} {'type': 'loss', 'content': 0.106675885617733, 'timestamp': '2025-09-30 22:27:26.870057', 'step': 10030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:26.927260', 'step': 10030, 'epoch': 2} {'type': 'loss', 'content': 0.19339464604854584, 'timestamp': '2025-09-30 22:27:26.930146', 'step': 10031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:26.986558', 'step': 10031, 'epoch': 2} {'type': 'loss', 'content': 0.11110764741897583, 'timestamp': '2025-09-30 22:27:26.993062', 'step': 10032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:27.053638', 'step': 10032, 'epoch': 2} {'type': 'loss', 'content': 0.07508288323879242, 'timestamp': '2025-09-30 22:27:27.056268', 'step': 10033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:27.114130', 'step': 10033, 'epoch': 2} {'type': 'loss', 'content': 0.08455885201692581, 'timestamp': '2025-09-30 22:27:27.116533', 'step': 10034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:27:27.174838', 'step': 10034, 'epoch': 2} {'type': 'loss', 'content': 0.10084040462970734, 'timestamp': '2025-09-30 22:27:27.181358', 'step': 10035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:27.242614', 'step': 10035, 'epoch': 2} {'type': 'loss', 'content': 0.07250282913446426, 'timestamp': '2025-09-30 22:27:27.252760', 'step': 10036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:27.314603', 'step': 10036, 'epoch': 2} {'type': 'loss', 'content': 0.09903917461633682, 'timestamp': '2025-09-30 22:27:27.321206', 'step': 10037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:27.388341', 'step': 10037, 'epoch': 2} {'type': 'loss', 'content': 0.18295370042324066, 'timestamp': '2025-09-30 22:27:27.396055', 'step': 10038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:27.463933', 'step': 10038, 'epoch': 2} {'type': 'loss', 'content': 0.15439854562282562, 'timestamp': '2025-09-30 22:27:27.470952', 'step': 10039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:27.530939', 'step': 10039, 'epoch': 2} {'type': 'loss', 'content': 0.061553049832582474, 'timestamp': '2025-09-30 22:27:27.546689', 'step': 10040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:27.604946', 'step': 10040, 'epoch': 2} {'type': 'loss', 'content': 0.13263459503650665, 'timestamp': '2025-09-30 22:27:27.611181', 'step': 10041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:27.678553', 'step': 10041, 'epoch': 2} {'type': 'loss', 'content': 0.18252688646316528, 'timestamp': '2025-09-30 22:27:27.681921', 'step': 10042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:27.745478', 'step': 10042, 'epoch': 2} {'type': 'loss', 'content': 0.14361223578453064, 'timestamp': '2025-09-30 22:27:27.748053', 'step': 10043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:27.809295', 'step': 10043, 'epoch': 2} {'type': 'loss', 'content': 0.12266318500041962, 'timestamp': '2025-09-30 22:27:27.817481', 'step': 10044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:27.876473', 'step': 10044, 'epoch': 2} {'type': 'loss', 'content': 0.1403467357158661, 'timestamp': '2025-09-30 22:27:27.879984', 'step': 10045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:27.938276', 'step': 10045, 'epoch': 2} {'type': 'loss', 'content': 0.1087164431810379, 'timestamp': '2025-09-30 22:27:27.941060', 'step': 10046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.011782', 'step': 10046, 'epoch': 2} {'type': 'loss', 'content': 0.15323710441589355, 'timestamp': '2025-09-30 22:27:28.019504', 'step': 10047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.077157', 'step': 10047, 'epoch': 2} {'type': 'loss', 'content': 0.14383625984191895, 'timestamp': '2025-09-30 22:27:28.084834', 'step': 10048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.140483', 'step': 10048, 'epoch': 2} {'type': 'loss', 'content': 0.11574284732341766, 'timestamp': '2025-09-30 22:27:28.144196', 'step': 10049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:28.201198', 'step': 10049, 'epoch': 2} {'type': 'loss', 'content': 0.08929052948951721, 'timestamp': '2025-09-30 22:27:28.207822', 'step': 10050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:28.266652', 'step': 10050, 'epoch': 2} {'type': 'loss', 'content': 0.13347725570201874, 'timestamp': '2025-09-30 22:27:28.271754', 'step': 10051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.330301', 'step': 10051, 'epoch': 2} {'type': 'loss', 'content': 0.09350740164518356, 'timestamp': '2025-09-30 22:27:28.337559', 'step': 10052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:28.395548', 'step': 10052, 'epoch': 2} {'type': 'loss', 'content': 0.15133483707904816, 'timestamp': '2025-09-30 22:27:28.398629', 'step': 10053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.456170', 'step': 10053, 'epoch': 2} {'type': 'loss', 'content': 0.11664198338985443, 'timestamp': '2025-09-30 22:27:28.459073', 'step': 10054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.533584', 'step': 10054, 'epoch': 2} {'type': 'loss', 'content': 0.043942805379629135, 'timestamp': '2025-09-30 22:27:28.536498', 'step': 10055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.606125', 'step': 10055, 'epoch': 2} {'type': 'loss', 'content': 0.19306398928165436, 'timestamp': '2025-09-30 22:27:28.617842', 'step': 10056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.674714', 'step': 10056, 'epoch': 2} {'type': 'loss', 'content': 0.048509735614061356, 'timestamp': '2025-09-30 22:27:28.677518', 'step': 10057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:28.734457', 'step': 10057, 'epoch': 2} {'type': 'loss', 'content': 0.07863881438970566, 'timestamp': '2025-09-30 22:27:28.738598', 'step': 10058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.796876', 'step': 10058, 'epoch': 2} {'type': 'loss', 'content': 0.11908308416604996, 'timestamp': '2025-09-30 22:27:28.804630', 'step': 10059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:28.863601', 'step': 10059, 'epoch': 2} {'type': 'loss', 'content': 0.14265176653862, 'timestamp': '2025-09-30 22:27:28.875267', 'step': 10060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:28.937084', 'step': 10060, 'epoch': 2} {'type': 'loss', 'content': 0.08552452176809311, 'timestamp': '2025-09-30 22:27:28.940626', 'step': 10061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:28.998201', 'step': 10061, 'epoch': 2} {'type': 'loss', 'content': 0.10121569037437439, 'timestamp': '2025-09-30 22:27:29.001213', 'step': 10062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:29.059611', 'step': 10062, 'epoch': 2} {'type': 'loss', 'content': 0.11668425798416138, 'timestamp': '2025-09-30 22:27:29.068639', 'step': 10063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:29.133090', 'step': 10063, 'epoch': 2} {'type': 'loss', 'content': 0.08172967284917831, 'timestamp': '2025-09-30 22:27:29.143144', 'step': 10064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:29.204525', 'step': 10064, 'epoch': 2} {'type': 'loss', 'content': 0.09994765371084213, 'timestamp': '2025-09-30 22:27:29.207545', 'step': 10065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:29.271365', 'step': 10065, 'epoch': 2} {'type': 'loss', 'content': 0.16836819052696228, 'timestamp': '2025-09-30 22:27:29.279774', 'step': 10066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:29.341258', 'step': 10066, 'epoch': 2} {'type': 'loss', 'content': 0.09772376716136932, 'timestamp': '2025-09-30 22:27:29.346555', 'step': 10067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:29.407948', 'step': 10067, 'epoch': 2} {'type': 'loss', 'content': 0.053019218146800995, 'timestamp': '2025-09-30 22:27:29.416275', 'step': 10068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:29.479065', 'step': 10068, 'epoch': 2} {'type': 'loss', 'content': 0.18314571678638458, 'timestamp': '2025-09-30 22:27:29.482262', 'step': 10069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:29.552939', 'step': 10069, 'epoch': 2} {'type': 'loss', 'content': 0.1535775363445282, 'timestamp': '2025-09-30 22:27:29.561043', 'step': 10070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:29.623289', 'step': 10070, 'epoch': 2} {'type': 'loss', 'content': 0.08370078355073929, 'timestamp': '2025-09-30 22:27:29.626193', 'step': 10071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:29.683926', 'step': 10071, 'epoch': 2} {'type': 'loss', 'content': 0.09368951618671417, 'timestamp': '2025-09-30 22:27:29.690675', 'step': 10072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:29.753123', 'step': 10072, 'epoch': 2} {'type': 'loss', 'content': 0.12189112603664398, 'timestamp': '2025-09-30 22:27:29.756530', 'step': 10073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:29.812955', 'step': 10073, 'epoch': 2} {'type': 'loss', 'content': 0.09698180854320526, 'timestamp': '2025-09-30 22:27:29.815848', 'step': 10074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:29.872956', 'step': 10074, 'epoch': 2} {'type': 'loss', 'content': 0.12265752255916595, 'timestamp': '2025-09-30 22:27:29.881931', 'step': 10075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:29.939513', 'step': 10075, 'epoch': 2} {'type': 'loss', 'content': 0.15558962523937225, 'timestamp': '2025-09-30 22:27:29.945362', 'step': 10076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:30.001474', 'step': 10076, 'epoch': 2} {'type': 'loss', 'content': 0.2475416660308838, 'timestamp': '2025-09-30 22:27:30.005201', 'step': 10077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:30.061763', 'step': 10077, 'epoch': 2} {'type': 'loss', 'content': 0.08178794384002686, 'timestamp': '2025-09-30 22:27:30.064750', 'step': 10078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:30.126684', 'step': 10078, 'epoch': 2} {'type': 'loss', 'content': 0.1694425642490387, 'timestamp': '2025-09-30 22:27:30.130660', 'step': 10079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:30.188598', 'step': 10079, 'epoch': 2} {'type': 'loss', 'content': 0.12745775282382965, 'timestamp': '2025-09-30 22:27:30.196142', 'step': 10080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:30.255652', 'step': 10080, 'epoch': 2} {'type': 'loss', 'content': 0.09224579483270645, 'timestamp': '2025-09-30 22:27:30.258439', 'step': 10081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:30.316604', 'step': 10081, 'epoch': 2} {'type': 'loss', 'content': 0.07020685821771622, 'timestamp': '2025-09-30 22:27:30.319773', 'step': 10082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:30.383561', 'step': 10082, 'epoch': 2} {'type': 'loss', 'content': 0.12590721249580383, 'timestamp': '2025-09-30 22:27:30.386842', 'step': 10083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:30.443975', 'step': 10083, 'epoch': 2} {'type': 'loss', 'content': 0.1585257202386856, 'timestamp': '2025-09-30 22:27:30.450155', 'step': 10084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:30.523423', 'step': 10084, 'epoch': 2} {'type': 'loss', 'content': 0.10229285061359406, 'timestamp': '2025-09-30 22:27:30.525995', 'step': 10085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:30.589444', 'step': 10085, 'epoch': 2} {'type': 'loss', 'content': 0.10323264449834824, 'timestamp': '2025-09-30 22:27:30.592195', 'step': 10086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:30.649579', 'step': 10086, 'epoch': 2} {'type': 'loss', 'content': 0.12359391152858734, 'timestamp': '2025-09-30 22:27:30.652567', 'step': 10087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:30.710021', 'step': 10087, 'epoch': 2} {'type': 'loss', 'content': 0.10249019414186478, 'timestamp': '2025-09-30 22:27:30.716827', 'step': 10088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:30.775579', 'step': 10088, 'epoch': 2} {'type': 'loss', 'content': 0.1049548014998436, 'timestamp': '2025-09-30 22:27:30.778559', 'step': 10089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:30.854171', 'step': 10089, 'epoch': 2} {'type': 'loss', 'content': 0.05081154406070709, 'timestamp': '2025-09-30 22:27:30.857490', 'step': 10090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:30.916093', 'step': 10090, 'epoch': 2} {'type': 'loss', 'content': 0.14680248498916626, 'timestamp': '2025-09-30 22:27:30.919638', 'step': 10091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:30.977432', 'step': 10091, 'epoch': 2} {'type': 'loss', 'content': 0.07943674921989441, 'timestamp': '2025-09-30 22:27:30.984446', 'step': 10092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:31.043230', 'step': 10092, 'epoch': 2} {'type': 'loss', 'content': 0.18946711719036102, 'timestamp': '2025-09-30 22:27:31.050527', 'step': 10093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:31.109599', 'step': 10093, 'epoch': 2} {'type': 'loss', 'content': 0.12610173225402832, 'timestamp': '2025-09-30 22:27:31.112787', 'step': 10094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:31.173612', 'step': 10094, 'epoch': 2} {'type': 'loss', 'content': 0.11568503826856613, 'timestamp': '2025-09-30 22:27:31.176845', 'step': 10095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:31.235679', 'step': 10095, 'epoch': 2} {'type': 'loss', 'content': 0.1347428858280182, 'timestamp': '2025-09-30 22:27:31.242245', 'step': 10096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:31.303765', 'step': 10096, 'epoch': 2} {'type': 'loss', 'content': 0.0911281406879425, 'timestamp': '2025-09-30 22:27:31.306389', 'step': 10097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:31.364795', 'step': 10097, 'epoch': 2} {'type': 'loss', 'content': 0.11357202380895615, 'timestamp': '2025-09-30 22:27:31.367949', 'step': 10098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:31.426640', 'step': 10098, 'epoch': 2} {'type': 'loss', 'content': 0.11529050767421722, 'timestamp': '2025-09-30 22:27:31.434822', 'step': 10099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:31.493065', 'step': 10099, 'epoch': 2} {'type': 'loss', 'content': 0.11354262381792068, 'timestamp': '2025-09-30 22:27:31.503086', 'step': 10100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:31.561738', 'step': 10100, 'epoch': 2} {'type': 'loss', 'content': 0.18125222623348236, 'timestamp': '2025-09-30 22:27:31.564425', 'step': 10101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:31.632094', 'step': 10101, 'epoch': 2} {'type': 'loss', 'content': 0.14130094647407532, 'timestamp': '2025-09-30 22:27:31.634769', 'step': 10102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:31.702650', 'step': 10102, 'epoch': 2} {'type': 'loss', 'content': 0.09426049143075943, 'timestamp': '2025-09-30 22:27:31.706462', 'step': 10103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:31.764605', 'step': 10103, 'epoch': 2} {'type': 'loss', 'content': 0.060802146792411804, 'timestamp': '2025-09-30 22:27:31.775662', 'step': 10104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:31.832532', 'step': 10104, 'epoch': 2} {'type': 'loss', 'content': 0.10412376374006271, 'timestamp': '2025-09-30 22:27:31.835563', 'step': 10105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:31.893261', 'step': 10105, 'epoch': 2} {'type': 'loss', 'content': 0.09310957789421082, 'timestamp': '2025-09-30 22:27:31.895815', 'step': 10106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:31.952835', 'step': 10106, 'epoch': 2} {'type': 'loss', 'content': 0.09237903356552124, 'timestamp': '2025-09-30 22:27:31.958768', 'step': 10107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:32.017465', 'step': 10107, 'epoch': 2} {'type': 'loss', 'content': 0.10148850828409195, 'timestamp': '2025-09-30 22:27:32.023989', 'step': 10108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:32.085515', 'step': 10108, 'epoch': 2} {'type': 'loss', 'content': 0.09317407011985779, 'timestamp': '2025-09-30 22:27:32.088457', 'step': 10109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:32.145257', 'step': 10109, 'epoch': 2} {'type': 'loss', 'content': 0.1639038771390915, 'timestamp': '2025-09-30 22:27:32.148213', 'step': 10110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:32.206403', 'step': 10110, 'epoch': 2} {'type': 'loss', 'content': 0.15209834277629852, 'timestamp': '2025-09-30 22:27:32.208815', 'step': 10111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:32.270302', 'step': 10111, 'epoch': 2} {'type': 'loss', 'content': 0.12758100032806396, 'timestamp': '2025-09-30 22:27:32.279550', 'step': 10112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:32.337689', 'step': 10112, 'epoch': 2} {'type': 'loss', 'content': 0.11737845093011856, 'timestamp': '2025-09-30 22:27:32.340175', 'step': 10113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:32.397904', 'step': 10113, 'epoch': 2} {'type': 'loss', 'content': 0.05106382817029953, 'timestamp': '2025-09-30 22:27:32.400260', 'step': 10114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:32.457929', 'step': 10114, 'epoch': 2} {'type': 'loss', 'content': 0.11683356761932373, 'timestamp': '2025-09-30 22:27:32.462358', 'step': 10115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:32.521147', 'step': 10115, 'epoch': 2} {'type': 'loss', 'content': 0.10146783292293549, 'timestamp': '2025-09-30 22:27:32.528139', 'step': 10116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:27:32.585128', 'step': 10116, 'epoch': 2} {'type': 'loss', 'content': 0.08123213797807693, 'timestamp': '2025-09-30 22:27:32.598317', 'step': 10117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:32.669982', 'step': 10117, 'epoch': 2} {'type': 'loss', 'content': 0.06887680292129517, 'timestamp': '2025-09-30 22:27:32.690797', 'step': 10118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:32.761211', 'step': 10118, 'epoch': 2} {'type': 'loss', 'content': 0.0821993499994278, 'timestamp': '2025-09-30 22:27:32.776336', 'step': 10119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:32.843768', 'step': 10119, 'epoch': 2} {'type': 'loss', 'content': 0.21146813035011292, 'timestamp': '2025-09-30 22:27:32.867598', 'step': 10120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:32.947801', 'step': 10120, 'epoch': 2} {'type': 'loss', 'content': 0.2347233146429062, 'timestamp': '2025-09-30 22:27:32.966618', 'step': 10121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:33.031703', 'step': 10121, 'epoch': 2} {'type': 'loss', 'content': 0.1262267529964447, 'timestamp': '2025-09-30 22:27:33.048306', 'step': 10122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:33.119326', 'step': 10122, 'epoch': 2} {'type': 'loss', 'content': 0.11024550348520279, 'timestamp': '2025-09-30 22:27:33.125602', 'step': 10123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:33.188790', 'step': 10123, 'epoch': 2} {'type': 'loss', 'content': 0.10498275607824326, 'timestamp': '2025-09-30 22:27:33.207563', 'step': 10124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:33.273565', 'step': 10124, 'epoch': 2} {'type': 'loss', 'content': 0.17625287175178528, 'timestamp': '2025-09-30 22:27:33.285476', 'step': 10125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:33.342817', 'step': 10125, 'epoch': 2} {'type': 'loss', 'content': 0.10080075263977051, 'timestamp': '2025-09-30 22:27:33.361114', 'step': 10126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:33.429446', 'step': 10126, 'epoch': 2} {'type': 'loss', 'content': 0.1792970448732376, 'timestamp': '2025-09-30 22:27:33.440793', 'step': 10127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:33.520251', 'step': 10127, 'epoch': 2} {'type': 'loss', 'content': 0.16669100522994995, 'timestamp': '2025-09-30 22:27:33.530990', 'step': 10128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:33.596660', 'step': 10128, 'epoch': 2} {'type': 'loss', 'content': 0.16613130271434784, 'timestamp': '2025-09-30 22:27:33.612832', 'step': 10129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:33.688917', 'step': 10129, 'epoch': 2} {'type': 'loss', 'content': 0.11285676062107086, 'timestamp': '2025-09-30 22:27:33.692114', 'step': 10130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:33.789668', 'step': 10130, 'epoch': 2} {'type': 'loss', 'content': 0.10898418724536896, 'timestamp': '2025-09-30 22:27:33.794994', 'step': 10131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:33.860826', 'step': 10131, 'epoch': 2} {'type': 'loss', 'content': 0.08280366659164429, 'timestamp': '2025-09-30 22:27:33.884204', 'step': 10132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:33.946981', 'step': 10132, 'epoch': 2} {'type': 'loss', 'content': 0.11297890543937683, 'timestamp': '2025-09-30 22:27:33.951560', 'step': 10133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:34.019974', 'step': 10133, 'epoch': 2} {'type': 'loss', 'content': 0.09229522198438644, 'timestamp': '2025-09-30 22:27:34.025938', 'step': 10134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:34.095154', 'step': 10134, 'epoch': 2} {'type': 'loss', 'content': 0.13963110744953156, 'timestamp': '2025-09-30 22:27:34.099301', 'step': 10135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:27:34.162707', 'step': 10135, 'epoch': 2} {'type': 'loss', 'content': 0.10150992125272751, 'timestamp': '2025-09-30 22:27:34.174269', 'step': 10136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:34.231290', 'step': 10136, 'epoch': 2} {'type': 'loss', 'content': 0.13818368315696716, 'timestamp': '2025-09-30 22:27:34.240849', 'step': 10137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:34.305753', 'step': 10137, 'epoch': 2} {'type': 'loss', 'content': 0.09796567261219025, 'timestamp': '2025-09-30 22:27:34.315571', 'step': 10138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:34.387078', 'step': 10138, 'epoch': 2} {'type': 'loss', 'content': 0.08207422494888306, 'timestamp': '2025-09-30 22:27:34.390256', 'step': 10139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:34.467989', 'step': 10139, 'epoch': 2} {'type': 'loss', 'content': 0.14430609345436096, 'timestamp': '2025-09-30 22:27:34.474813', 'step': 10140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:34.537195', 'step': 10140, 'epoch': 2} {'type': 'loss', 'content': 0.06102456897497177, 'timestamp': '2025-09-30 22:27:34.549680', 'step': 10141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:34.613080', 'step': 10141, 'epoch': 2} {'type': 'loss', 'content': 0.10712628811597824, 'timestamp': '2025-09-30 22:27:34.617014', 'step': 10142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:34.674980', 'step': 10142, 'epoch': 2} {'type': 'loss', 'content': 0.16092529892921448, 'timestamp': '2025-09-30 22:27:34.685143', 'step': 10143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:34.744484', 'step': 10143, 'epoch': 2} {'type': 'loss', 'content': 0.13078078627586365, 'timestamp': '2025-09-30 22:27:34.751378', 'step': 10144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:34.813090', 'step': 10144, 'epoch': 2} {'type': 'loss', 'content': 0.06933261454105377, 'timestamp': '2025-09-30 22:27:34.823956', 'step': 10145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:34.886762', 'step': 10145, 'epoch': 2} {'type': 'loss', 'content': 0.0988234430551529, 'timestamp': '2025-09-30 22:27:34.897688', 'step': 10146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:34.956112', 'step': 10146, 'epoch': 2} {'type': 'loss', 'content': 0.11069563776254654, 'timestamp': '2025-09-30 22:27:34.963231', 'step': 10147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:35.027080', 'step': 10147, 'epoch': 2} {'type': 'loss', 'content': 0.09999095648527145, 'timestamp': '2025-09-30 22:27:35.035357', 'step': 10148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:35.101914', 'step': 10148, 'epoch': 2} {'type': 'loss', 'content': 0.10318473726511002, 'timestamp': '2025-09-30 22:27:35.105380', 'step': 10149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:35.163093', 'step': 10149, 'epoch': 2} {'type': 'loss', 'content': 0.13131067156791687, 'timestamp': '2025-09-30 22:27:35.166956', 'step': 10150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:35.226238', 'step': 10150, 'epoch': 2} {'type': 'loss', 'content': 0.15986838936805725, 'timestamp': '2025-09-30 22:27:35.242497', 'step': 10151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:35.301967', 'step': 10151, 'epoch': 2} {'type': 'loss', 'content': 0.1831333488225937, 'timestamp': '2025-09-30 22:27:35.315484', 'step': 10152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:35.376366', 'step': 10152, 'epoch': 2} {'type': 'loss', 'content': 0.0795203372836113, 'timestamp': '2025-09-30 22:27:35.384556', 'step': 10153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:35.448715', 'step': 10153, 'epoch': 2} {'type': 'loss', 'content': 0.12223555147647858, 'timestamp': '2025-09-30 22:27:35.456950', 'step': 10154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:35.520624', 'step': 10154, 'epoch': 2} {'type': 'loss', 'content': 0.14067144691944122, 'timestamp': '2025-09-30 22:27:35.523750', 'step': 10155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:35.579856', 'step': 10155, 'epoch': 2} {'type': 'loss', 'content': 0.10390114039182663, 'timestamp': '2025-09-30 22:27:35.594757', 'step': 10156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:35.652139', 'step': 10156, 'epoch': 2} {'type': 'loss', 'content': 0.1104903519153595, 'timestamp': '2025-09-30 22:27:35.665870', 'step': 10157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:35.723185', 'step': 10157, 'epoch': 2} {'type': 'loss', 'content': 0.1327325701713562, 'timestamp': '2025-09-30 22:27:35.731871', 'step': 10158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:35.803051', 'step': 10158, 'epoch': 2} {'type': 'loss', 'content': 0.15313856303691864, 'timestamp': '2025-09-30 22:27:35.811409', 'step': 10159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:35.870449', 'step': 10159, 'epoch': 2} {'type': 'loss', 'content': 0.09309268742799759, 'timestamp': '2025-09-30 22:27:35.876972', 'step': 10160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:35.934840', 'step': 10160, 'epoch': 2} {'type': 'loss', 'content': 0.0725189670920372, 'timestamp': '2025-09-30 22:27:35.937224', 'step': 10161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:35.997551', 'step': 10161, 'epoch': 2} {'type': 'loss', 'content': 0.12798456847667694, 'timestamp': '2025-09-30 22:27:36.000927', 'step': 10162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:36.058770', 'step': 10162, 'epoch': 2} {'type': 'loss', 'content': 0.2556329369544983, 'timestamp': '2025-09-30 22:27:36.061500', 'step': 10163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:36.119593', 'step': 10163, 'epoch': 2} {'type': 'loss', 'content': 0.15058763325214386, 'timestamp': '2025-09-30 22:27:36.126758', 'step': 10164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:36.183074', 'step': 10164, 'epoch': 2} {'type': 'loss', 'content': 0.17496761679649353, 'timestamp': '2025-09-30 22:27:36.190677', 'step': 10165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:36.251405', 'step': 10165, 'epoch': 2} {'type': 'loss', 'content': 0.15649020671844482, 'timestamp': '2025-09-30 22:27:36.255010', 'step': 10166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:36.315065', 'step': 10166, 'epoch': 2} {'type': 'loss', 'content': 0.20418725907802582, 'timestamp': '2025-09-30 22:27:36.318777', 'step': 10167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:36.376716', 'step': 10167, 'epoch': 2} {'type': 'loss', 'content': 0.13358087837696075, 'timestamp': '2025-09-30 22:27:36.383329', 'step': 10168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:36.440000', 'step': 10168, 'epoch': 2} {'type': 'loss', 'content': 0.141043022274971, 'timestamp': '2025-09-30 22:27:36.443133', 'step': 10169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:36.500941', 'step': 10169, 'epoch': 2} {'type': 'loss', 'content': 0.21457622945308685, 'timestamp': '2025-09-30 22:27:36.508209', 'step': 10170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:36.565649', 'step': 10170, 'epoch': 2} {'type': 'loss', 'content': 0.1477169394493103, 'timestamp': '2025-09-30 22:27:36.568409', 'step': 10171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:36.626214', 'step': 10171, 'epoch': 2} {'type': 'loss', 'content': 0.1618862897157669, 'timestamp': '2025-09-30 22:27:36.632252', 'step': 10172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:36.692191', 'step': 10172, 'epoch': 2} {'type': 'loss', 'content': 0.17413927614688873, 'timestamp': '2025-09-30 22:27:36.695498', 'step': 10173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:36.767420', 'step': 10173, 'epoch': 2} {'type': 'loss', 'content': 0.1185513436794281, 'timestamp': '2025-09-30 22:27:36.770282', 'step': 10174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:36.831721', 'step': 10174, 'epoch': 2} {'type': 'loss', 'content': 0.1814502328634262, 'timestamp': '2025-09-30 22:27:36.835947', 'step': 10175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:36.892706', 'step': 10175, 'epoch': 2} {'type': 'loss', 'content': 0.12797681987285614, 'timestamp': '2025-09-30 22:27:36.900158', 'step': 10176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:36.964732', 'step': 10176, 'epoch': 2} {'type': 'loss', 'content': 0.13096289336681366, 'timestamp': '2025-09-30 22:27:36.967131', 'step': 10177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:37.024653', 'step': 10177, 'epoch': 2} {'type': 'loss', 'content': 0.166855588555336, 'timestamp': '2025-09-30 22:27:37.027676', 'step': 10178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:37.085895', 'step': 10178, 'epoch': 2} {'type': 'loss', 'content': 0.07360117882490158, 'timestamp': '2025-09-30 22:27:37.089109', 'step': 10179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:37.147633', 'step': 10179, 'epoch': 2} {'type': 'loss', 'content': 0.2673657536506653, 'timestamp': '2025-09-30 22:27:37.158697', 'step': 10180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:37.226478', 'step': 10180, 'epoch': 2} {'type': 'loss', 'content': 0.18738169968128204, 'timestamp': '2025-09-30 22:27:37.236764', 'step': 10181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:37.301306', 'step': 10181, 'epoch': 2} {'type': 'loss', 'content': 0.13618676364421844, 'timestamp': '2025-09-30 22:27:37.308517', 'step': 10182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:37.369440', 'step': 10182, 'epoch': 2} {'type': 'loss', 'content': 0.14480431377887726, 'timestamp': '2025-09-30 22:27:37.376140', 'step': 10183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:37.437318', 'step': 10183, 'epoch': 2} {'type': 'loss', 'content': 0.11484236270189285, 'timestamp': '2025-09-30 22:27:37.444638', 'step': 10184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:37.503875', 'step': 10184, 'epoch': 2} {'type': 'loss', 'content': 0.14885541796684265, 'timestamp': '2025-09-30 22:27:37.510902', 'step': 10185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:37.574369', 'step': 10185, 'epoch': 2} {'type': 'loss', 'content': 0.07583124935626984, 'timestamp': '2025-09-30 22:27:37.583455', 'step': 10186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:37.646803', 'step': 10186, 'epoch': 2} {'type': 'loss', 'content': 0.19074612855911255, 'timestamp': '2025-09-30 22:27:37.651472', 'step': 10187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:37.709204', 'step': 10187, 'epoch': 2} {'type': 'loss', 'content': 0.1799800544977188, 'timestamp': '2025-09-30 22:27:37.722076', 'step': 10188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:37.778671', 'step': 10188, 'epoch': 2} {'type': 'loss', 'content': 0.0957692340016365, 'timestamp': '2025-09-30 22:27:37.781621', 'step': 10189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:37.838623', 'step': 10189, 'epoch': 2} {'type': 'loss', 'content': 0.08824963867664337, 'timestamp': '2025-09-30 22:27:37.846394', 'step': 10190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:37.905350', 'step': 10190, 'epoch': 2} {'type': 'loss', 'content': 0.24625614285469055, 'timestamp': '2025-09-30 22:27:37.909324', 'step': 10191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:37.968334', 'step': 10191, 'epoch': 2} {'type': 'loss', 'content': 0.13457757234573364, 'timestamp': '2025-09-30 22:27:37.980892', 'step': 10192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:38.039940', 'step': 10192, 'epoch': 2} {'type': 'loss', 'content': 0.10531522333621979, 'timestamp': '2025-09-30 22:27:38.043512', 'step': 10193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:38.102102', 'step': 10193, 'epoch': 2} {'type': 'loss', 'content': 0.12216758728027344, 'timestamp': '2025-09-30 22:27:38.106825', 'step': 10194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:38.165753', 'step': 10194, 'epoch': 2} {'type': 'loss', 'content': 0.1692831665277481, 'timestamp': '2025-09-30 22:27:38.169252', 'step': 10195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:38.235148', 'step': 10195, 'epoch': 2} {'type': 'loss', 'content': 0.15388339757919312, 'timestamp': '2025-09-30 22:27:38.241696', 'step': 10196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:38.305572', 'step': 10196, 'epoch': 2} {'type': 'loss', 'content': 0.09330795705318451, 'timestamp': '2025-09-30 22:27:38.308885', 'step': 10197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:38.374157', 'step': 10197, 'epoch': 2} {'type': 'loss', 'content': 0.1482788771390915, 'timestamp': '2025-09-30 22:27:38.377423', 'step': 10198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:38.434306', 'step': 10198, 'epoch': 2} {'type': 'loss', 'content': 0.07735545188188553, 'timestamp': '2025-09-30 22:27:38.436971', 'step': 10199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:38.499887', 'step': 10199, 'epoch': 2} {'type': 'loss', 'content': 0.1369597315788269, 'timestamp': '2025-09-30 22:27:38.506376', 'step': 10200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:38.563881', 'step': 10200, 'epoch': 2} {'type': 'loss', 'content': 0.1658772975206375, 'timestamp': '2025-09-30 22:27:38.567610', 'step': 10201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:38.624758', 'step': 10201, 'epoch': 2} {'type': 'loss', 'content': 0.07443906366825104, 'timestamp': '2025-09-30 22:27:38.628071', 'step': 10202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:38.684667', 'step': 10202, 'epoch': 2} {'type': 'loss', 'content': 0.11992444843053818, 'timestamp': '2025-09-30 22:27:38.688400', 'step': 10203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:38.746456', 'step': 10203, 'epoch': 2} {'type': 'loss', 'content': 0.12565869092941284, 'timestamp': '2025-09-30 22:27:38.753402', 'step': 10204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:38.809643', 'step': 10204, 'epoch': 2} {'type': 'loss', 'content': 0.17234888672828674, 'timestamp': '2025-09-30 22:27:38.812522', 'step': 10205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:38.876644', 'step': 10205, 'epoch': 2} {'type': 'loss', 'content': 0.10719761252403259, 'timestamp': '2025-09-30 22:27:38.879822', 'step': 10206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:38.937312', 'step': 10206, 'epoch': 2} {'type': 'loss', 'content': 0.09285905957221985, 'timestamp': '2025-09-30 22:27:38.940468', 'step': 10207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:39.005827', 'step': 10207, 'epoch': 2} {'type': 'loss', 'content': 0.10235615819692612, 'timestamp': '2025-09-30 22:27:39.012471', 'step': 10208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:27:39.076971', 'step': 10208, 'epoch': 2} {'type': 'loss', 'content': 0.13337989151477814, 'timestamp': '2025-09-30 22:27:39.079774', 'step': 10209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:39.139621', 'step': 10209, 'epoch': 2} {'type': 'loss', 'content': 0.09309089928865433, 'timestamp': '2025-09-30 22:27:39.150116', 'step': 10210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:39.208732', 'step': 10210, 'epoch': 2} {'type': 'loss', 'content': 0.1425529271364212, 'timestamp': '2025-09-30 22:27:39.213055', 'step': 10211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:39.269718', 'step': 10211, 'epoch': 2} {'type': 'loss', 'content': 0.08106531947851181, 'timestamp': '2025-09-30 22:27:39.276643', 'step': 10212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:39.333879', 'step': 10212, 'epoch': 2} {'type': 'loss', 'content': 0.09188412874937057, 'timestamp': '2025-09-30 22:27:39.339866', 'step': 10213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:39.404237', 'step': 10213, 'epoch': 2} {'type': 'loss', 'content': 0.21636569499969482, 'timestamp': '2025-09-30 22:27:39.411214', 'step': 10214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:39.469349', 'step': 10214, 'epoch': 2} {'type': 'loss', 'content': 0.16322416067123413, 'timestamp': '2025-09-30 22:27:39.473160', 'step': 10215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:39.535616', 'step': 10215, 'epoch': 2} {'type': 'loss', 'content': 0.09679309278726578, 'timestamp': '2025-09-30 22:27:39.547846', 'step': 10216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:39.604257', 'step': 10216, 'epoch': 2} {'type': 'loss', 'content': 0.10955239087343216, 'timestamp': '2025-09-30 22:27:39.612903', 'step': 10217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:39.674522', 'step': 10217, 'epoch': 2} {'type': 'loss', 'content': 0.15922899544239044, 'timestamp': '2025-09-30 22:27:39.681175', 'step': 10218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:39.743340', 'step': 10218, 'epoch': 2} {'type': 'loss', 'content': 0.11107835173606873, 'timestamp': '2025-09-30 22:27:39.749377', 'step': 10219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:39.809527', 'step': 10219, 'epoch': 2} {'type': 'loss', 'content': 0.0795900747179985, 'timestamp': '2025-09-30 22:27:39.819117', 'step': 10220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:39.879790', 'step': 10220, 'epoch': 2} {'type': 'loss', 'content': 0.14448387920856476, 'timestamp': '2025-09-30 22:27:39.882435', 'step': 10221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:39.942002', 'step': 10221, 'epoch': 2} {'type': 'loss', 'content': 0.08588994294404984, 'timestamp': '2025-09-30 22:27:39.944750', 'step': 10222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:40.002302', 'step': 10222, 'epoch': 2} {'type': 'loss', 'content': 0.08985427767038345, 'timestamp': '2025-09-30 22:27:40.005566', 'step': 10223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:40.062503', 'step': 10223, 'epoch': 2} {'type': 'loss', 'content': 0.16218148171901703, 'timestamp': '2025-09-30 22:27:40.073511', 'step': 10224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:40.130190', 'step': 10224, 'epoch': 2} {'type': 'loss', 'content': 0.1555403470993042, 'timestamp': '2025-09-30 22:27:40.133265', 'step': 10225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:40.195284', 'step': 10225, 'epoch': 2} {'type': 'loss', 'content': 0.15437500178813934, 'timestamp': '2025-09-30 22:27:40.198991', 'step': 10226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:40.256277', 'step': 10226, 'epoch': 2} {'type': 'loss', 'content': 0.16265977919101715, 'timestamp': '2025-09-30 22:27:40.263577', 'step': 10227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:40.321769', 'step': 10227, 'epoch': 2} {'type': 'loss', 'content': 0.11581593006849289, 'timestamp': '2025-09-30 22:27:40.327952', 'step': 10228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:40.384752', 'step': 10228, 'epoch': 2} {'type': 'loss', 'content': 0.12458077073097229, 'timestamp': '2025-09-30 22:27:40.387327', 'step': 10229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:40.444583', 'step': 10229, 'epoch': 2} {'type': 'loss', 'content': 0.17730075120925903, 'timestamp': '2025-09-30 22:27:40.458605', 'step': 10230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:40.520282', 'step': 10230, 'epoch': 2} {'type': 'loss', 'content': 0.08224133402109146, 'timestamp': '2025-09-30 22:27:40.523469', 'step': 10231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:40.586470', 'step': 10231, 'epoch': 2} {'type': 'loss', 'content': 0.21658721566200256, 'timestamp': '2025-09-30 22:27:40.604619', 'step': 10232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:40.661209', 'step': 10232, 'epoch': 2} {'type': 'loss', 'content': 0.0597802996635437, 'timestamp': '2025-09-30 22:27:40.664657', 'step': 10233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:40.722866', 'step': 10233, 'epoch': 2} {'type': 'loss', 'content': 0.18585161864757538, 'timestamp': '2025-09-30 22:27:40.755560', 'step': 10234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:40.814292', 'step': 10234, 'epoch': 2} {'type': 'loss', 'content': 0.12809573113918304, 'timestamp': '2025-09-30 22:27:40.817382', 'step': 10235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:40.874706', 'step': 10235, 'epoch': 2} {'type': 'loss', 'content': 0.09497952461242676, 'timestamp': '2025-09-30 22:27:40.881765', 'step': 10236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:40.939731', 'step': 10236, 'epoch': 2} {'type': 'loss', 'content': 0.13889387249946594, 'timestamp': '2025-09-30 22:27:40.948693', 'step': 10237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.010459', 'step': 10237, 'epoch': 2} {'type': 'loss', 'content': 0.11580918729305267, 'timestamp': '2025-09-30 22:27:41.014026', 'step': 10238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.074671', 'step': 10238, 'epoch': 2} {'type': 'loss', 'content': 0.10172806680202484, 'timestamp': '2025-09-30 22:27:41.086921', 'step': 10239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.160912', 'step': 10239, 'epoch': 2} {'type': 'loss', 'content': 0.18870587646961212, 'timestamp': '2025-09-30 22:27:41.167357', 'step': 10240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:41.229813', 'step': 10240, 'epoch': 2} {'type': 'loss', 'content': 0.12164636701345444, 'timestamp': '2025-09-30 22:27:41.233059', 'step': 10241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:41.289454', 'step': 10241, 'epoch': 2} {'type': 'loss', 'content': 0.14842963218688965, 'timestamp': '2025-09-30 22:27:41.292788', 'step': 10242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:41.349622', 'step': 10242, 'epoch': 2} {'type': 'loss', 'content': 0.15616609156131744, 'timestamp': '2025-09-30 22:27:41.352892', 'step': 10243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:41.415071', 'step': 10243, 'epoch': 2} {'type': 'loss', 'content': 0.17647042870521545, 'timestamp': '2025-09-30 22:27:41.421135', 'step': 10244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.478484', 'step': 10244, 'epoch': 2} {'type': 'loss', 'content': 0.0412500761449337, 'timestamp': '2025-09-30 22:27:41.480764', 'step': 10245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.538306', 'step': 10245, 'epoch': 2} {'type': 'loss', 'content': 0.22283022105693817, 'timestamp': '2025-09-30 22:27:41.541609', 'step': 10246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.602373', 'step': 10246, 'epoch': 2} {'type': 'loss', 'content': 0.14000335335731506, 'timestamp': '2025-09-30 22:27:41.605425', 'step': 10247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.666329', 'step': 10247, 'epoch': 2} {'type': 'loss', 'content': 0.12372546643018723, 'timestamp': '2025-09-30 22:27:41.678012', 'step': 10248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:41.735096', 'step': 10248, 'epoch': 2} {'type': 'loss', 'content': 0.10439489781856537, 'timestamp': '2025-09-30 22:27:41.740182', 'step': 10249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.797231', 'step': 10249, 'epoch': 2} {'type': 'loss', 'content': 0.19957274198532104, 'timestamp': '2025-09-30 22:27:41.799774', 'step': 10250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:41.861429', 'step': 10250, 'epoch': 2} {'type': 'loss', 'content': 0.09156899899244308, 'timestamp': '2025-09-30 22:27:41.864419', 'step': 10251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:41.939335', 'step': 10251, 'epoch': 2} {'type': 'loss', 'content': 0.3142085075378418, 'timestamp': '2025-09-30 22:27:41.946650', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:27:56.076917', 'step': 10252, 'epoch': 2} {'type': 'pplx', 'content': 13751.591936148678, 'timestamp': '2025-09-30 22:27:56.083576', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:56.140212', 'step': 10252, 'epoch': 2} {'type': 'loss', 'content': 0.10721148550510406, 'timestamp': '2025-09-30 22:27:56.146178', 'step': 10253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:56.206047', 'step': 10253, 'epoch': 2} {'type': 'loss', 'content': 0.14277887344360352, 'timestamp': '2025-09-30 22:27:56.210748', 'step': 10254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:56.280068', 'step': 10254, 'epoch': 2} {'type': 'loss', 'content': 0.10545225441455841, 'timestamp': '2025-09-30 22:27:56.284282', 'step': 10255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:56.349363', 'step': 10255, 'epoch': 2} {'type': 'loss', 'content': 0.1062573716044426, 'timestamp': '2025-09-30 22:27:56.356322', 'step': 10256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:56.413868', 'step': 10256, 'epoch': 2} {'type': 'loss', 'content': 0.1502854973077774, 'timestamp': '2025-09-30 22:27:56.417609', 'step': 10257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:27:56.476481', 'step': 10257, 'epoch': 2} {'type': 'loss', 'content': 0.14455969631671906, 'timestamp': '2025-09-30 22:27:56.481421', 'step': 10258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:56.539726', 'step': 10258, 'epoch': 2} {'type': 'loss', 'content': 0.05800719931721687, 'timestamp': '2025-09-30 22:27:56.547215', 'step': 10259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:56.607378', 'step': 10259, 'epoch': 2} {'type': 'loss', 'content': 0.11065452545881271, 'timestamp': '2025-09-30 22:27:56.615232', 'step': 10260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:56.672555', 'step': 10260, 'epoch': 2} {'type': 'loss', 'content': 0.07159397006034851, 'timestamp': '2025-09-30 22:27:56.676177', 'step': 10261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:56.735324', 'step': 10261, 'epoch': 2} {'type': 'loss', 'content': 0.12229421734809875, 'timestamp': '2025-09-30 22:27:56.738706', 'step': 10262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:27:56.798069', 'step': 10262, 'epoch': 2} {'type': 'loss', 'content': 0.07587813585996628, 'timestamp': '2025-09-30 22:27:56.802877', 'step': 10263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:56.861761', 'step': 10263, 'epoch': 2} {'type': 'loss', 'content': 0.13849033415317535, 'timestamp': '2025-09-30 22:27:56.869661', 'step': 10264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:56.926833', 'step': 10264, 'epoch': 2} {'type': 'loss', 'content': 0.15339729189872742, 'timestamp': '2025-09-30 22:27:56.931264', 'step': 10265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:56.990354', 'step': 10265, 'epoch': 2} {'type': 'loss', 'content': 0.09223960340023041, 'timestamp': '2025-09-30 22:27:56.994431', 'step': 10266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:57.052544', 'step': 10266, 'epoch': 2} {'type': 'loss', 'content': 0.18861816823482513, 'timestamp': '2025-09-30 22:27:57.056182', 'step': 10267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:57.114462', 'step': 10267, 'epoch': 2} {'type': 'loss', 'content': 0.1769038438796997, 'timestamp': '2025-09-30 22:27:57.132049', 'step': 10268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:57.190411', 'step': 10268, 'epoch': 2} {'type': 'loss', 'content': 0.2646123170852661, 'timestamp': '2025-09-30 22:27:57.194811', 'step': 10269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:57.252648', 'step': 10269, 'epoch': 2} {'type': 'loss', 'content': 0.1387770026922226, 'timestamp': '2025-09-30 22:27:57.256141', 'step': 10270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:57.315193', 'step': 10270, 'epoch': 2} {'type': 'loss', 'content': 0.08198211342096329, 'timestamp': '2025-09-30 22:27:57.319075', 'step': 10271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:57.376535', 'step': 10271, 'epoch': 2} {'type': 'loss', 'content': 0.04016749933362007, 'timestamp': '2025-09-30 22:27:57.383404', 'step': 10272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:27:57.441392', 'step': 10272, 'epoch': 2} {'type': 'loss', 'content': 0.13215842843055725, 'timestamp': '2025-09-30 22:27:57.444517', 'step': 10273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:57.503252', 'step': 10273, 'epoch': 2} {'type': 'loss', 'content': 0.08715473115444183, 'timestamp': '2025-09-30 22:27:57.507984', 'step': 10274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:57.566168', 'step': 10274, 'epoch': 2} {'type': 'loss', 'content': 0.13401466608047485, 'timestamp': '2025-09-30 22:27:57.578012', 'step': 10275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:57.645051', 'step': 10275, 'epoch': 2} {'type': 'loss', 'content': 0.14511451125144958, 'timestamp': '2025-09-30 22:27:57.653151', 'step': 10276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:57.709517', 'step': 10276, 'epoch': 2} {'type': 'loss', 'content': 0.09431866556406021, 'timestamp': '2025-09-30 22:27:57.713341', 'step': 10277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:57.791574', 'step': 10277, 'epoch': 2} {'type': 'loss', 'content': 0.0896265059709549, 'timestamp': '2025-09-30 22:27:57.794745', 'step': 10278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:57.855341', 'step': 10278, 'epoch': 2} {'type': 'loss', 'content': 0.2654116451740265, 'timestamp': '2025-09-30 22:27:57.858516', 'step': 10279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:57.930353', 'step': 10279, 'epoch': 2} {'type': 'loss', 'content': 0.15336400270462036, 'timestamp': '2025-09-30 22:27:57.937826', 'step': 10280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:57.997970', 'step': 10280, 'epoch': 2} {'type': 'loss', 'content': 0.13132667541503906, 'timestamp': '2025-09-30 22:27:58.002511', 'step': 10281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.077647', 'step': 10281, 'epoch': 2} {'type': 'loss', 'content': 0.21321415901184082, 'timestamp': '2025-09-30 22:27:58.081213', 'step': 10282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:58.139392', 'step': 10282, 'epoch': 2} {'type': 'loss', 'content': 0.1076653003692627, 'timestamp': '2025-09-30 22:27:58.142639', 'step': 10283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.201682', 'step': 10283, 'epoch': 2} {'type': 'loss', 'content': 0.05188273638486862, 'timestamp': '2025-09-30 22:27:58.213343', 'step': 10284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:58.271769', 'step': 10284, 'epoch': 2} {'type': 'loss', 'content': 0.1245746985077858, 'timestamp': '2025-09-30 22:27:58.285511', 'step': 10285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:58.344504', 'step': 10285, 'epoch': 2} {'type': 'loss', 'content': 0.13435786962509155, 'timestamp': '2025-09-30 22:27:58.354192', 'step': 10286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.436075', 'step': 10286, 'epoch': 2} {'type': 'loss', 'content': 0.1220317855477333, 'timestamp': '2025-09-30 22:27:58.442451', 'step': 10287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:58.508430', 'step': 10287, 'epoch': 2} {'type': 'loss', 'content': 0.062496621161699295, 'timestamp': '2025-09-30 22:27:58.516516', 'step': 10288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.583060', 'step': 10288, 'epoch': 2} {'type': 'loss', 'content': 0.07238301634788513, 'timestamp': '2025-09-30 22:27:58.587504', 'step': 10289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.662282', 'step': 10289, 'epoch': 2} {'type': 'loss', 'content': 0.20250697433948517, 'timestamp': '2025-09-30 22:27:58.668481', 'step': 10290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.726114', 'step': 10290, 'epoch': 2} {'type': 'loss', 'content': 0.147549569606781, 'timestamp': '2025-09-30 22:27:58.739715', 'step': 10291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.812569', 'step': 10291, 'epoch': 2} {'type': 'loss', 'content': 0.11897175759077072, 'timestamp': '2025-09-30 22:27:58.820793', 'step': 10292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:58.878369', 'step': 10292, 'epoch': 2} {'type': 'loss', 'content': 0.07527387142181396, 'timestamp': '2025-09-30 22:27:58.880914', 'step': 10293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:58.938451', 'step': 10293, 'epoch': 2} {'type': 'loss', 'content': 0.09542684257030487, 'timestamp': '2025-09-30 22:27:58.942041', 'step': 10294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:59.000514', 'step': 10294, 'epoch': 2} {'type': 'loss', 'content': 0.05706724897027016, 'timestamp': '2025-09-30 22:27:59.009812', 'step': 10295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:59.069130', 'step': 10295, 'epoch': 2} {'type': 'loss', 'content': 0.12496128678321838, 'timestamp': '2025-09-30 22:27:59.076625', 'step': 10296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:59.148049', 'step': 10296, 'epoch': 2} {'type': 'loss', 'content': 0.15404263138771057, 'timestamp': '2025-09-30 22:27:59.152117', 'step': 10297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:59.211217', 'step': 10297, 'epoch': 2} {'type': 'loss', 'content': 0.1605977565050125, 'timestamp': '2025-09-30 22:27:59.215500', 'step': 10298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:59.276200', 'step': 10298, 'epoch': 2} {'type': 'loss', 'content': 0.09772776812314987, 'timestamp': '2025-09-30 22:27:59.279912', 'step': 10299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:59.341400', 'step': 10299, 'epoch': 2} {'type': 'loss', 'content': 0.14855767786502838, 'timestamp': '2025-09-30 22:27:59.355871', 'step': 10300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:59.424630', 'step': 10300, 'epoch': 2} {'type': 'loss', 'content': 0.23009341955184937, 'timestamp': '2025-09-30 22:27:59.428317', 'step': 10301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:59.498994', 'step': 10301, 'epoch': 2} {'type': 'loss', 'content': 0.10876476764678955, 'timestamp': '2025-09-30 22:27:59.512802', 'step': 10302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:27:59.571089', 'step': 10302, 'epoch': 2} {'type': 'loss', 'content': 0.11180806159973145, 'timestamp': '2025-09-30 22:27:59.583452', 'step': 10303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:27:59.643072', 'step': 10303, 'epoch': 2} {'type': 'loss', 'content': 0.12615495920181274, 'timestamp': '2025-09-30 22:27:59.651552', 'step': 10304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:59.712743', 'step': 10304, 'epoch': 2} {'type': 'loss', 'content': 0.09036966413259506, 'timestamp': '2025-09-30 22:27:59.716887', 'step': 10305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:59.776592', 'step': 10305, 'epoch': 2} {'type': 'loss', 'content': 0.16142027080059052, 'timestamp': '2025-09-30 22:27:59.780975', 'step': 10306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:59.838420', 'step': 10306, 'epoch': 2} {'type': 'loss', 'content': 0.10813020169734955, 'timestamp': '2025-09-30 22:27:59.850158', 'step': 10307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:27:59.908202', 'step': 10307, 'epoch': 2} {'type': 'loss', 'content': 0.07988106459379196, 'timestamp': '2025-09-30 22:27:59.916275', 'step': 10308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:27:59.978978', 'step': 10308, 'epoch': 2} {'type': 'loss', 'content': 0.10197806358337402, 'timestamp': '2025-09-30 22:27:59.983063', 'step': 10309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:00.055389', 'step': 10309, 'epoch': 2} {'type': 'loss', 'content': 0.1088080033659935, 'timestamp': '2025-09-30 22:28:00.059866', 'step': 10310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:00.120540', 'step': 10310, 'epoch': 2} {'type': 'loss', 'content': 0.12221743166446686, 'timestamp': '2025-09-30 22:28:00.123738', 'step': 10311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:00.182412', 'step': 10311, 'epoch': 2} {'type': 'loss', 'content': 0.11884171515703201, 'timestamp': '2025-09-30 22:28:00.189566', 'step': 10312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:00.259401', 'step': 10312, 'epoch': 2} {'type': 'loss', 'content': 0.10836376994848251, 'timestamp': '2025-09-30 22:28:00.274093', 'step': 10313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:00.338984', 'step': 10313, 'epoch': 2} {'type': 'loss', 'content': 0.10356444865465164, 'timestamp': '2025-09-30 22:28:00.343042', 'step': 10314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:00.425290', 'step': 10314, 'epoch': 2} {'type': 'loss', 'content': 0.14627417922019958, 'timestamp': '2025-09-30 22:28:00.429316', 'step': 10315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:00.503769', 'step': 10315, 'epoch': 2} {'type': 'loss', 'content': 0.11552921682596207, 'timestamp': '2025-09-30 22:28:00.519125', 'step': 10316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:00.586475', 'step': 10316, 'epoch': 2} {'type': 'loss', 'content': 0.1768387407064438, 'timestamp': '2025-09-30 22:28:00.589897', 'step': 10317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:00.649138', 'step': 10317, 'epoch': 2} {'type': 'loss', 'content': 0.12252301722764969, 'timestamp': '2025-09-30 22:28:00.654496', 'step': 10318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:00.731208', 'step': 10318, 'epoch': 2} {'type': 'loss', 'content': 0.2026520073413849, 'timestamp': '2025-09-30 22:28:00.745871', 'step': 10319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:00.818092', 'step': 10319, 'epoch': 2} {'type': 'loss', 'content': 0.18964914977550507, 'timestamp': '2025-09-30 22:28:00.825872', 'step': 10320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:00.885066', 'step': 10320, 'epoch': 2} {'type': 'loss', 'content': 0.11481032520532608, 'timestamp': '2025-09-30 22:28:00.894305', 'step': 10321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:00.964419', 'step': 10321, 'epoch': 2} {'type': 'loss', 'content': 0.16609379649162292, 'timestamp': '2025-09-30 22:28:00.975011', 'step': 10322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:01.049830', 'step': 10322, 'epoch': 2} {'type': 'loss', 'content': 0.03971254825592041, 'timestamp': '2025-09-30 22:28:01.053255', 'step': 10323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:01.113929', 'step': 10323, 'epoch': 2} {'type': 'loss', 'content': 0.08634267002344131, 'timestamp': '2025-09-30 22:28:01.121162', 'step': 10324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:01.201610', 'step': 10324, 'epoch': 2} {'type': 'loss', 'content': 0.12710393965244293, 'timestamp': '2025-09-30 22:28:01.214608', 'step': 10325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:01.283543', 'step': 10325, 'epoch': 2} {'type': 'loss', 'content': 0.1470564752817154, 'timestamp': '2025-09-30 22:28:01.287344', 'step': 10326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:01.360268', 'step': 10326, 'epoch': 2} {'type': 'loss', 'content': 0.14956264197826385, 'timestamp': '2025-09-30 22:28:01.364143', 'step': 10327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:01.423674', 'step': 10327, 'epoch': 2} {'type': 'loss', 'content': 0.1128929853439331, 'timestamp': '2025-09-30 22:28:01.430934', 'step': 10328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:01.490113', 'step': 10328, 'epoch': 2} {'type': 'loss', 'content': 0.16812889277935028, 'timestamp': '2025-09-30 22:28:01.493091', 'step': 10329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:01.553902', 'step': 10329, 'epoch': 2} {'type': 'loss', 'content': 0.0722888931632042, 'timestamp': '2025-09-30 22:28:01.556771', 'step': 10330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:01.620679', 'step': 10330, 'epoch': 2} {'type': 'loss', 'content': 0.11884938180446625, 'timestamp': '2025-09-30 22:28:01.624153', 'step': 10331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:01.694528', 'step': 10331, 'epoch': 2} {'type': 'loss', 'content': 0.0664043128490448, 'timestamp': '2025-09-30 22:28:01.708876', 'step': 10332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:01.775705', 'step': 10332, 'epoch': 2} {'type': 'loss', 'content': 0.17884723842144012, 'timestamp': '2025-09-30 22:28:01.779665', 'step': 10333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:01.839486', 'step': 10333, 'epoch': 2} {'type': 'loss', 'content': 0.09964784234762192, 'timestamp': '2025-09-30 22:28:01.843270', 'step': 10334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:01.903772', 'step': 10334, 'epoch': 2} {'type': 'loss', 'content': 0.11147354543209076, 'timestamp': '2025-09-30 22:28:01.912104', 'step': 10335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:01.980513', 'step': 10335, 'epoch': 2} {'type': 'loss', 'content': 0.17591151595115662, 'timestamp': '2025-09-30 22:28:01.991958', 'step': 10336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:02.068332', 'step': 10336, 'epoch': 2} {'type': 'loss', 'content': 0.18480157852172852, 'timestamp': '2025-09-30 22:28:02.071964', 'step': 10337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:02.130677', 'step': 10337, 'epoch': 2} {'type': 'loss', 'content': 0.14617739617824554, 'timestamp': '2025-09-30 22:28:02.133782', 'step': 10338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:02.208731', 'step': 10338, 'epoch': 2} {'type': 'loss', 'content': 0.11954917013645172, 'timestamp': '2025-09-30 22:28:02.220441', 'step': 10339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:02.281648', 'step': 10339, 'epoch': 2} {'type': 'loss', 'content': 0.12268178910017014, 'timestamp': '2025-09-30 22:28:02.296896', 'step': 10340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:02.355202', 'step': 10340, 'epoch': 2} {'type': 'loss', 'content': 0.1504475325345993, 'timestamp': '2025-09-30 22:28:02.359431', 'step': 10341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:02.419800', 'step': 10341, 'epoch': 2} {'type': 'loss', 'content': 0.179887592792511, 'timestamp': '2025-09-30 22:28:02.424617', 'step': 10342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:02.496382', 'step': 10342, 'epoch': 2} {'type': 'loss', 'content': 0.09840571135282516, 'timestamp': '2025-09-30 22:28:02.499606', 'step': 10343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:02.558972', 'step': 10343, 'epoch': 2} {'type': 'loss', 'content': 0.20103248953819275, 'timestamp': '2025-09-30 22:28:02.566014', 'step': 10344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:02.626855', 'step': 10344, 'epoch': 2} {'type': 'loss', 'content': 0.06073576211929321, 'timestamp': '2025-09-30 22:28:02.630769', 'step': 10345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:02.693344', 'step': 10345, 'epoch': 2} {'type': 'loss', 'content': 0.05506878346204758, 'timestamp': '2025-09-30 22:28:02.699846', 'step': 10346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:02.762995', 'step': 10346, 'epoch': 2} {'type': 'loss', 'content': 0.1373784840106964, 'timestamp': '2025-09-30 22:28:02.767635', 'step': 10347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:02.828239', 'step': 10347, 'epoch': 2} {'type': 'loss', 'content': 0.11043500155210495, 'timestamp': '2025-09-30 22:28:02.836136', 'step': 10348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:02.897678', 'step': 10348, 'epoch': 2} {'type': 'loss', 'content': 0.15695078670978546, 'timestamp': '2025-09-30 22:28:02.901239', 'step': 10349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:02.962149', 'step': 10349, 'epoch': 2} {'type': 'loss', 'content': 0.10708599537611008, 'timestamp': '2025-09-30 22:28:02.968111', 'step': 10350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:03.042108', 'step': 10350, 'epoch': 2} {'type': 'loss', 'content': 0.07131624966859818, 'timestamp': '2025-09-30 22:28:03.045731', 'step': 10351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:03.105372', 'step': 10351, 'epoch': 2} {'type': 'loss', 'content': 0.08460099995136261, 'timestamp': '2025-09-30 22:28:03.111815', 'step': 10352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:03.174677', 'step': 10352, 'epoch': 2} {'type': 'loss', 'content': 0.09696029126644135, 'timestamp': '2025-09-30 22:28:03.177456', 'step': 10353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.244823', 'step': 10353, 'epoch': 2} {'type': 'loss', 'content': 0.12197235971689224, 'timestamp': '2025-09-30 22:28:03.247726', 'step': 10354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.309480', 'step': 10354, 'epoch': 2} {'type': 'loss', 'content': 0.09040810912847519, 'timestamp': '2025-09-30 22:28:03.312225', 'step': 10355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:28:03.380976', 'step': 10355, 'epoch': 2} {'type': 'loss', 'content': 0.16351322829723358, 'timestamp': '2025-09-30 22:28:03.392314', 'step': 10356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.449113', 'step': 10356, 'epoch': 2} {'type': 'loss', 'content': 0.05610562488436699, 'timestamp': '2025-09-30 22:28:03.451395', 'step': 10357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.512646', 'step': 10357, 'epoch': 2} {'type': 'loss', 'content': 0.11969137191772461, 'timestamp': '2025-09-30 22:28:03.515393', 'step': 10358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.579935', 'step': 10358, 'epoch': 2} {'type': 'loss', 'content': 0.213813915848732, 'timestamp': '2025-09-30 22:28:03.583323', 'step': 10359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:03.642478', 'step': 10359, 'epoch': 2} {'type': 'loss', 'content': 0.11139190942049026, 'timestamp': '2025-09-30 22:28:03.654684', 'step': 10360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.711833', 'step': 10360, 'epoch': 2} {'type': 'loss', 'content': 0.11375314742326736, 'timestamp': '2025-09-30 22:28:03.716058', 'step': 10361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.792147', 'step': 10361, 'epoch': 2} {'type': 'loss', 'content': 0.11097639799118042, 'timestamp': '2025-09-30 22:28:03.796853', 'step': 10362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:03.872061', 'step': 10362, 'epoch': 2} {'type': 'loss', 'content': 0.17620746791362762, 'timestamp': '2025-09-30 22:28:03.874836', 'step': 10363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:03.934060', 'step': 10363, 'epoch': 2} {'type': 'loss', 'content': 0.05442250519990921, 'timestamp': '2025-09-30 22:28:03.940768', 'step': 10364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:03.999212', 'step': 10364, 'epoch': 2} {'type': 'loss', 'content': 0.15451939404010773, 'timestamp': '2025-09-30 22:28:04.002680', 'step': 10365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:04.076119', 'step': 10365, 'epoch': 2} {'type': 'loss', 'content': 0.2158132791519165, 'timestamp': '2025-09-30 22:28:04.079068', 'step': 10366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:04.147075', 'step': 10366, 'epoch': 2} {'type': 'loss', 'content': 0.06893949210643768, 'timestamp': '2025-09-30 22:28:04.152360', 'step': 10367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:04.211723', 'step': 10367, 'epoch': 2} {'type': 'loss', 'content': 0.1362556368112564, 'timestamp': '2025-09-30 22:28:04.219182', 'step': 10368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:04.276560', 'step': 10368, 'epoch': 2} {'type': 'loss', 'content': 0.09811840951442719, 'timestamp': '2025-09-30 22:28:04.282920', 'step': 10369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:04.341134', 'step': 10369, 'epoch': 2} {'type': 'loss', 'content': 0.10274729877710342, 'timestamp': '2025-09-30 22:28:04.351013', 'step': 10370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:04.409158', 'step': 10370, 'epoch': 2} {'type': 'loss', 'content': 0.09376313537359238, 'timestamp': '2025-09-30 22:28:04.412498', 'step': 10371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:04.470028', 'step': 10371, 'epoch': 2} {'type': 'loss', 'content': 0.14486925303936005, 'timestamp': '2025-09-30 22:28:04.476772', 'step': 10372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:04.541303', 'step': 10372, 'epoch': 2} {'type': 'loss', 'content': 0.10272163152694702, 'timestamp': '2025-09-30 22:28:04.544229', 'step': 10373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:04.607531', 'step': 10373, 'epoch': 2} {'type': 'loss', 'content': 0.1272554099559784, 'timestamp': '2025-09-30 22:28:04.611505', 'step': 10374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:04.688815', 'step': 10374, 'epoch': 2} {'type': 'loss', 'content': 0.13404199481010437, 'timestamp': '2025-09-30 22:28:04.696873', 'step': 10375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:04.760768', 'step': 10375, 'epoch': 2} {'type': 'loss', 'content': 0.11779814958572388, 'timestamp': '2025-09-30 22:28:04.768350', 'step': 10376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:04.837773', 'step': 10376, 'epoch': 2} {'type': 'loss', 'content': 0.07453593611717224, 'timestamp': '2025-09-30 22:28:04.841913', 'step': 10377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:04.903615', 'step': 10377, 'epoch': 2} {'type': 'loss', 'content': 0.14048978686332703, 'timestamp': '2025-09-30 22:28:04.909634', 'step': 10378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:04.967412', 'step': 10378, 'epoch': 2} {'type': 'loss', 'content': 0.12985703349113464, 'timestamp': '2025-09-30 22:28:04.970598', 'step': 10379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:05.041030', 'step': 10379, 'epoch': 2} {'type': 'loss', 'content': 0.1972113996744156, 'timestamp': '2025-09-30 22:28:05.047871', 'step': 10380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:05.107254', 'step': 10380, 'epoch': 2} {'type': 'loss', 'content': 0.16568267345428467, 'timestamp': '2025-09-30 22:28:05.112117', 'step': 10381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:05.170687', 'step': 10381, 'epoch': 2} {'type': 'loss', 'content': 0.090477354824543, 'timestamp': '2025-09-30 22:28:05.173518', 'step': 10382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:05.234258', 'step': 10382, 'epoch': 2} {'type': 'loss', 'content': 0.12324139475822449, 'timestamp': '2025-09-30 22:28:05.239358', 'step': 10383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:05.299141', 'step': 10383, 'epoch': 2} {'type': 'loss', 'content': 0.11749490350484848, 'timestamp': '2025-09-30 22:28:05.306082', 'step': 10384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:05.375150', 'step': 10384, 'epoch': 2} {'type': 'loss', 'content': 0.19288675487041473, 'timestamp': '2025-09-30 22:28:05.379930', 'step': 10385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:05.452597', 'step': 10385, 'epoch': 2} {'type': 'loss', 'content': 0.13466675579547882, 'timestamp': '2025-09-30 22:28:05.456200', 'step': 10386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:05.513753', 'step': 10386, 'epoch': 2} {'type': 'loss', 'content': 0.0542001836001873, 'timestamp': '2025-09-30 22:28:05.516662', 'step': 10387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:05.584296', 'step': 10387, 'epoch': 2} {'type': 'loss', 'content': 0.10885214060544968, 'timestamp': '2025-09-30 22:28:05.591520', 'step': 10388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:05.651723', 'step': 10388, 'epoch': 2} {'type': 'loss', 'content': 0.1744871586561203, 'timestamp': '2025-09-30 22:28:05.656692', 'step': 10389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:05.728827', 'step': 10389, 'epoch': 2} {'type': 'loss', 'content': 0.11218047142028809, 'timestamp': '2025-09-30 22:28:05.734848', 'step': 10390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:05.823977', 'step': 10390, 'epoch': 2} {'type': 'loss', 'content': 0.09934293478727341, 'timestamp': '2025-09-30 22:28:05.828288', 'step': 10391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:05.887343', 'step': 10391, 'epoch': 2} {'type': 'loss', 'content': 0.1505378782749176, 'timestamp': '2025-09-30 22:28:05.894728', 'step': 10392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:05.964897', 'step': 10392, 'epoch': 2} {'type': 'loss', 'content': 0.1300361305475235, 'timestamp': '2025-09-30 22:28:05.969918', 'step': 10393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.041795', 'step': 10393, 'epoch': 2} {'type': 'loss', 'content': 0.2107153981924057, 'timestamp': '2025-09-30 22:28:06.045561', 'step': 10394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.107916', 'step': 10394, 'epoch': 2} {'type': 'loss', 'content': 0.09587418287992477, 'timestamp': '2025-09-30 22:28:06.110860', 'step': 10395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:28:06.184134', 'step': 10395, 'epoch': 2} {'type': 'loss', 'content': 0.18127675354480743, 'timestamp': '2025-09-30 22:28:06.191013', 'step': 10396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:06.248642', 'step': 10396, 'epoch': 2} {'type': 'loss', 'content': 0.15957967936992645, 'timestamp': '2025-09-30 22:28:06.252650', 'step': 10397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:06.317382', 'step': 10397, 'epoch': 2} {'type': 'loss', 'content': 0.23412370681762695, 'timestamp': '2025-09-30 22:28:06.320998', 'step': 10398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.389152', 'step': 10398, 'epoch': 2} {'type': 'loss', 'content': 0.14380759000778198, 'timestamp': '2025-09-30 22:28:06.399094', 'step': 10399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:06.458406', 'step': 10399, 'epoch': 2} {'type': 'loss', 'content': 0.17705680429935455, 'timestamp': '2025-09-30 22:28:06.465008', 'step': 10400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.539086', 'step': 10400, 'epoch': 2} {'type': 'loss', 'content': 0.15863609313964844, 'timestamp': '2025-09-30 22:28:06.541907', 'step': 10401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.603954', 'step': 10401, 'epoch': 2} {'type': 'loss', 'content': 0.09315159171819687, 'timestamp': '2025-09-30 22:28:06.606250', 'step': 10402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.665446', 'step': 10402, 'epoch': 2} {'type': 'loss', 'content': 0.11028952896595001, 'timestamp': '2025-09-30 22:28:06.667860', 'step': 10403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:06.726522', 'step': 10403, 'epoch': 2} {'type': 'loss', 'content': 0.08533943444490433, 'timestamp': '2025-09-30 22:28:06.732787', 'step': 10404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.791573', 'step': 10404, 'epoch': 2} {'type': 'loss', 'content': 0.12766194343566895, 'timestamp': '2025-09-30 22:28:06.802092', 'step': 10405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:06.862997', 'step': 10405, 'epoch': 2} {'type': 'loss', 'content': 0.11334937810897827, 'timestamp': '2025-09-30 22:28:06.866375', 'step': 10406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:06.924837', 'step': 10406, 'epoch': 2} {'type': 'loss', 'content': 0.09105362743139267, 'timestamp': '2025-09-30 22:28:06.928038', 'step': 10407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:06.987565', 'step': 10407, 'epoch': 2} {'type': 'loss', 'content': 0.12315100431442261, 'timestamp': '2025-09-30 22:28:07.000492', 'step': 10408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:07.059663', 'step': 10408, 'epoch': 2} {'type': 'loss', 'content': 0.09068305790424347, 'timestamp': '2025-09-30 22:28:07.064010', 'step': 10409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:07.122860', 'step': 10409, 'epoch': 2} {'type': 'loss', 'content': 0.09925413131713867, 'timestamp': '2025-09-30 22:28:07.131497', 'step': 10410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:07.189587', 'step': 10410, 'epoch': 2} {'type': 'loss', 'content': 0.12461952120065689, 'timestamp': '2025-09-30 22:28:07.193071', 'step': 10411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:07.263340', 'step': 10411, 'epoch': 2} {'type': 'loss', 'content': 0.219096839427948, 'timestamp': '2025-09-30 22:28:07.269893', 'step': 10412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:07.327152', 'step': 10412, 'epoch': 2} {'type': 'loss', 'content': 0.12764830887317657, 'timestamp': '2025-09-30 22:28:07.329676', 'step': 10413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:07.399524', 'step': 10413, 'epoch': 2} {'type': 'loss', 'content': 0.11066015809774399, 'timestamp': '2025-09-30 22:28:07.407522', 'step': 10414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:07.465419', 'step': 10414, 'epoch': 2} {'type': 'loss', 'content': 0.11973702162504196, 'timestamp': '2025-09-30 22:28:07.469329', 'step': 10415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:07.533689', 'step': 10415, 'epoch': 2} {'type': 'loss', 'content': 0.09209585934877396, 'timestamp': '2025-09-30 22:28:07.540415', 'step': 10416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:07.611404', 'step': 10416, 'epoch': 2} {'type': 'loss', 'content': 0.13293321430683136, 'timestamp': '2025-09-30 22:28:07.617490', 'step': 10417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:07.688712', 'step': 10417, 'epoch': 2} {'type': 'loss', 'content': 0.22164766490459442, 'timestamp': '2025-09-30 22:28:07.692006', 'step': 10418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:07.766546', 'step': 10418, 'epoch': 2} {'type': 'loss', 'content': 0.13038167357444763, 'timestamp': '2025-09-30 22:28:07.770018', 'step': 10419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:07.829141', 'step': 10419, 'epoch': 2} {'type': 'loss', 'content': 0.18311139941215515, 'timestamp': '2025-09-30 22:28:07.847148', 'step': 10420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:07.909991', 'step': 10420, 'epoch': 2} {'type': 'loss', 'content': 0.14493924379348755, 'timestamp': '2025-09-30 22:28:07.914499', 'step': 10421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:07.979948', 'step': 10421, 'epoch': 2} {'type': 'loss', 'content': 0.16386783123016357, 'timestamp': '2025-09-30 22:28:07.983259', 'step': 10422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:08.045622', 'step': 10422, 'epoch': 2} {'type': 'loss', 'content': 0.14787238836288452, 'timestamp': '2025-09-30 22:28:08.052313', 'step': 10423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:08.123309', 'step': 10423, 'epoch': 2} {'type': 'loss', 'content': 0.12936753034591675, 'timestamp': '2025-09-30 22:28:08.130506', 'step': 10424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:08.188775', 'step': 10424, 'epoch': 2} {'type': 'loss', 'content': 0.1706126183271408, 'timestamp': '2025-09-30 22:28:08.191380', 'step': 10425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:08.257798', 'step': 10425, 'epoch': 2} {'type': 'loss', 'content': 0.1476019024848938, 'timestamp': '2025-09-30 22:28:08.260361', 'step': 10426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:08.318616', 'step': 10426, 'epoch': 2} {'type': 'loss', 'content': 0.1273488700389862, 'timestamp': '2025-09-30 22:28:08.321398', 'step': 10427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:08.390878', 'step': 10427, 'epoch': 2} {'type': 'loss', 'content': 0.07148091495037079, 'timestamp': '2025-09-30 22:28:08.403933', 'step': 10428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:08.462071', 'step': 10428, 'epoch': 2} {'type': 'loss', 'content': 0.11701284348964691, 'timestamp': '2025-09-30 22:28:08.465297', 'step': 10429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:08.523928', 'step': 10429, 'epoch': 2} {'type': 'loss', 'content': 0.1073000580072403, 'timestamp': '2025-09-30 22:28:08.526764', 'step': 10430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:08.585523', 'step': 10430, 'epoch': 2} {'type': 'loss', 'content': 0.19445832073688507, 'timestamp': '2025-09-30 22:28:08.588844', 'step': 10431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:08.656142', 'step': 10431, 'epoch': 2} {'type': 'loss', 'content': 0.1338920146226883, 'timestamp': '2025-09-30 22:28:08.662949', 'step': 10432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:08.730937', 'step': 10432, 'epoch': 2} {'type': 'loss', 'content': 0.13182243704795837, 'timestamp': '2025-09-30 22:28:08.733595', 'step': 10433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:08.806915', 'step': 10433, 'epoch': 2} {'type': 'loss', 'content': 0.09610379487276077, 'timestamp': '2025-09-30 22:28:08.809510', 'step': 10434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:08.869266', 'step': 10434, 'epoch': 2} {'type': 'loss', 'content': 0.11967934668064117, 'timestamp': '2025-09-30 22:28:08.876569', 'step': 10435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:08.946558', 'step': 10435, 'epoch': 2} {'type': 'loss', 'content': 0.0722043514251709, 'timestamp': '2025-09-30 22:28:08.953225', 'step': 10436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:28:09.017613', 'step': 10436, 'epoch': 2} {'type': 'loss', 'content': 0.14291536808013916, 'timestamp': '2025-09-30 22:28:09.020913', 'step': 10437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:09.088312', 'step': 10437, 'epoch': 2} {'type': 'loss', 'content': 0.085629403591156, 'timestamp': '2025-09-30 22:28:09.091984', 'step': 10438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:09.153523', 'step': 10438, 'epoch': 2} {'type': 'loss', 'content': 0.06465261429548264, 'timestamp': '2025-09-30 22:28:09.157206', 'step': 10439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:09.215646', 'step': 10439, 'epoch': 2} {'type': 'loss', 'content': 0.16794000566005707, 'timestamp': '2025-09-30 22:28:09.231445', 'step': 10440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:09.298326', 'step': 10440, 'epoch': 2} {'type': 'loss', 'content': 0.09841180592775345, 'timestamp': '2025-09-30 22:28:09.301651', 'step': 10441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:09.361179', 'step': 10441, 'epoch': 2} {'type': 'loss', 'content': 0.11741716414690018, 'timestamp': '2025-09-30 22:28:09.364917', 'step': 10442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:09.427841', 'step': 10442, 'epoch': 2} {'type': 'loss', 'content': 0.08834514021873474, 'timestamp': '2025-09-30 22:28:09.432824', 'step': 10443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:09.491650', 'step': 10443, 'epoch': 2} {'type': 'loss', 'content': 0.13864029943943024, 'timestamp': '2025-09-30 22:28:09.503946', 'step': 10444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:09.574539', 'step': 10444, 'epoch': 2} {'type': 'loss', 'content': 0.08905226737260818, 'timestamp': '2025-09-30 22:28:09.577587', 'step': 10445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:09.638143', 'step': 10445, 'epoch': 2} {'type': 'loss', 'content': 0.1051321029663086, 'timestamp': '2025-09-30 22:28:09.641520', 'step': 10446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:09.703793', 'step': 10446, 'epoch': 2} {'type': 'loss', 'content': 0.0538722425699234, 'timestamp': '2025-09-30 22:28:09.707479', 'step': 10447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:09.767815', 'step': 10447, 'epoch': 2} {'type': 'loss', 'content': 0.1631280481815338, 'timestamp': '2025-09-30 22:28:09.774867', 'step': 10448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:09.842676', 'step': 10448, 'epoch': 2} {'type': 'loss', 'content': 0.19715702533721924, 'timestamp': '2025-09-30 22:28:09.845664', 'step': 10449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:09.910100', 'step': 10449, 'epoch': 2} {'type': 'loss', 'content': 0.14590007066726685, 'timestamp': '2025-09-30 22:28:09.913501', 'step': 10450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:09.979726', 'step': 10450, 'epoch': 2} {'type': 'loss', 'content': 0.10869213193655014, 'timestamp': '2025-09-30 22:28:09.982842', 'step': 10451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.065264', 'step': 10451, 'epoch': 2} {'type': 'loss', 'content': 0.20204107463359833, 'timestamp': '2025-09-30 22:28:10.071903', 'step': 10452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:10.142300', 'step': 10452, 'epoch': 2} {'type': 'loss', 'content': 0.18391084671020508, 'timestamp': '2025-09-30 22:28:10.146740', 'step': 10453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.206625', 'step': 10453, 'epoch': 2} {'type': 'loss', 'content': 0.1624356061220169, 'timestamp': '2025-09-30 22:28:10.211897', 'step': 10454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.271000', 'step': 10454, 'epoch': 2} {'type': 'loss', 'content': 0.14708711206912994, 'timestamp': '2025-09-30 22:28:10.273935', 'step': 10455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:10.358322', 'step': 10455, 'epoch': 2} {'type': 'loss', 'content': 0.09925413876771927, 'timestamp': '2025-09-30 22:28:10.365786', 'step': 10456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.425353', 'step': 10456, 'epoch': 2} {'type': 'loss', 'content': 0.09635239839553833, 'timestamp': '2025-09-30 22:28:10.429279', 'step': 10457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:10.489193', 'step': 10457, 'epoch': 2} {'type': 'loss', 'content': 0.15569451451301575, 'timestamp': '2025-09-30 22:28:10.492172', 'step': 10458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:10.550587', 'step': 10458, 'epoch': 2} {'type': 'loss', 'content': 0.12557142972946167, 'timestamp': '2025-09-30 22:28:10.553726', 'step': 10459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:10.610483', 'step': 10459, 'epoch': 2} {'type': 'loss', 'content': 0.12431687116622925, 'timestamp': '2025-09-30 22:28:10.617194', 'step': 10460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.675872', 'step': 10460, 'epoch': 2} {'type': 'loss', 'content': 0.07817282527685165, 'timestamp': '2025-09-30 22:28:10.681137', 'step': 10461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.745826', 'step': 10461, 'epoch': 2} {'type': 'loss', 'content': 0.1674729734659195, 'timestamp': '2025-09-30 22:28:10.748819', 'step': 10462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.821757', 'step': 10462, 'epoch': 2} {'type': 'loss', 'content': 0.12878260016441345, 'timestamp': '2025-09-30 22:28:10.826293', 'step': 10463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:10.888152', 'step': 10463, 'epoch': 2} {'type': 'loss', 'content': 0.0981677919626236, 'timestamp': '2025-09-30 22:28:10.898930', 'step': 10464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:10.960919', 'step': 10464, 'epoch': 2} {'type': 'loss', 'content': 0.05919644981622696, 'timestamp': '2025-09-30 22:28:10.964781', 'step': 10465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:11.024002', 'step': 10465, 'epoch': 2} {'type': 'loss', 'content': 0.13363578915596008, 'timestamp': '2025-09-30 22:28:11.027198', 'step': 10466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:11.085799', 'step': 10466, 'epoch': 2} {'type': 'loss', 'content': 0.15570925176143646, 'timestamp': '2025-09-30 22:28:11.089429', 'step': 10467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:11.148147', 'step': 10467, 'epoch': 2} {'type': 'loss', 'content': 0.04667709767818451, 'timestamp': '2025-09-30 22:28:11.157233', 'step': 10468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:11.216907', 'step': 10468, 'epoch': 2} {'type': 'loss', 'content': 0.17804576456546783, 'timestamp': '2025-09-30 22:28:11.221379', 'step': 10469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:11.293365', 'step': 10469, 'epoch': 2} {'type': 'loss', 'content': 0.21831250190734863, 'timestamp': '2025-09-30 22:28:11.302533', 'step': 10470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:11.362525', 'step': 10470, 'epoch': 2} {'type': 'loss', 'content': 0.09289279580116272, 'timestamp': '2025-09-30 22:28:11.366481', 'step': 10471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:11.425409', 'step': 10471, 'epoch': 2} {'type': 'loss', 'content': 0.037713125348091125, 'timestamp': '2025-09-30 22:28:11.433924', 'step': 10472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:11.494708', 'step': 10472, 'epoch': 2} {'type': 'loss', 'content': 0.10188492387533188, 'timestamp': '2025-09-30 22:28:11.498874', 'step': 10473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:11.558772', 'step': 10473, 'epoch': 2} {'type': 'loss', 'content': 0.08719269931316376, 'timestamp': '2025-09-30 22:28:11.562223', 'step': 10474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:11.625988', 'step': 10474, 'epoch': 2} {'type': 'loss', 'content': 0.05689966306090355, 'timestamp': '2025-09-30 22:28:11.629159', 'step': 10475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:11.696484', 'step': 10475, 'epoch': 2} {'type': 'loss', 'content': 0.14262960851192474, 'timestamp': '2025-09-30 22:28:11.703879', 'step': 10476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:11.777316', 'step': 10476, 'epoch': 2} {'type': 'loss', 'content': 0.1535484790802002, 'timestamp': '2025-09-30 22:28:11.779735', 'step': 10477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:11.842376', 'step': 10477, 'epoch': 2} {'type': 'loss', 'content': 0.185287743806839, 'timestamp': '2025-09-30 22:28:11.845475', 'step': 10478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:11.906628', 'step': 10478, 'epoch': 2} {'type': 'loss', 'content': 0.04845799133181572, 'timestamp': '2025-09-30 22:28:11.910375', 'step': 10479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:11.969205', 'step': 10479, 'epoch': 2} {'type': 'loss', 'content': 0.11410611122846603, 'timestamp': '2025-09-30 22:28:11.975266', 'step': 10480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.034077', 'step': 10480, 'epoch': 2} {'type': 'loss', 'content': 0.08993552625179291, 'timestamp': '2025-09-30 22:28:12.037337', 'step': 10481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:12.101338', 'step': 10481, 'epoch': 2} {'type': 'loss', 'content': 0.13618113100528717, 'timestamp': '2025-09-30 22:28:12.104270', 'step': 10482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.186844', 'step': 10482, 'epoch': 2} {'type': 'loss', 'content': 0.1541357785463333, 'timestamp': '2025-09-30 22:28:12.189861', 'step': 10483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:12.252143', 'step': 10483, 'epoch': 2} {'type': 'loss', 'content': 0.09402856975793839, 'timestamp': '2025-09-30 22:28:12.258952', 'step': 10484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:12.317053', 'step': 10484, 'epoch': 2} {'type': 'loss', 'content': 0.12788915634155273, 'timestamp': '2025-09-30 22:28:12.320607', 'step': 10485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.380240', 'step': 10485, 'epoch': 2} {'type': 'loss', 'content': 0.04367620497941971, 'timestamp': '2025-09-30 22:28:12.384327', 'step': 10486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:12.445100', 'step': 10486, 'epoch': 2} {'type': 'loss', 'content': 0.13180628418922424, 'timestamp': '2025-09-30 22:28:12.448286', 'step': 10487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:12.506547', 'step': 10487, 'epoch': 2} {'type': 'loss', 'content': 0.1825980842113495, 'timestamp': '2025-09-30 22:28:12.513600', 'step': 10488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.571370', 'step': 10488, 'epoch': 2} {'type': 'loss', 'content': 0.10279770195484161, 'timestamp': '2025-09-30 22:28:12.575322', 'step': 10489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.634614', 'step': 10489, 'epoch': 2} {'type': 'loss', 'content': 0.14821238815784454, 'timestamp': '2025-09-30 22:28:12.638025', 'step': 10490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.697157', 'step': 10490, 'epoch': 2} {'type': 'loss', 'content': 0.140238419175148, 'timestamp': '2025-09-30 22:28:12.699993', 'step': 10491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.759343', 'step': 10491, 'epoch': 2} {'type': 'loss', 'content': 0.11583735793828964, 'timestamp': '2025-09-30 22:28:12.765425', 'step': 10492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:12.823807', 'step': 10492, 'epoch': 2} {'type': 'loss', 'content': 0.13750490546226501, 'timestamp': '2025-09-30 22:28:12.829347', 'step': 10493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:12.892620', 'step': 10493, 'epoch': 2} {'type': 'loss', 'content': 0.15680943429470062, 'timestamp': '2025-09-30 22:28:12.895355', 'step': 10494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:12.953083', 'step': 10494, 'epoch': 2} {'type': 'loss', 'content': 0.11500362306833267, 'timestamp': '2025-09-30 22:28:12.956037', 'step': 10495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:13.019422', 'step': 10495, 'epoch': 2} {'type': 'loss', 'content': 0.1554758995771408, 'timestamp': '2025-09-30 22:28:13.025574', 'step': 10496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:13.092726', 'step': 10496, 'epoch': 2} {'type': 'loss', 'content': 0.11637776345014572, 'timestamp': '2025-09-30 22:28:13.099259', 'step': 10497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:13.170969', 'step': 10497, 'epoch': 2} {'type': 'loss', 'content': 0.17134220898151398, 'timestamp': '2025-09-30 22:28:13.173847', 'step': 10498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:13.240283', 'step': 10498, 'epoch': 2} {'type': 'loss', 'content': 0.10969018191099167, 'timestamp': '2025-09-30 22:28:13.242888', 'step': 10499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:13.319078', 'step': 10499, 'epoch': 2} {'type': 'loss', 'content': 0.1378764659166336, 'timestamp': '2025-09-30 22:28:13.326749', 'step': 10500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10500', 'timestamp': '2025-09-30 22:28:13.801115', 'step': 10500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:13.874123', 'step': 10500, 'epoch': 2} {'type': 'loss', 'content': 0.08885394781827927, 'timestamp': '2025-09-30 22:28:13.879594', 'step': 10501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:13.938381', 'step': 10501, 'epoch': 2} {'type': 'loss', 'content': 0.11405833810567856, 'timestamp': '2025-09-30 22:28:13.943500', 'step': 10502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:14.002453', 'step': 10502, 'epoch': 2} {'type': 'loss', 'content': 0.1505814641714096, 'timestamp': '2025-09-30 22:28:14.006845', 'step': 10503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:14.071371', 'step': 10503, 'epoch': 2} {'type': 'loss', 'content': 0.16073672473430634, 'timestamp': '2025-09-30 22:28:14.083034', 'step': 10504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:14.140346', 'step': 10504, 'epoch': 2} {'type': 'loss', 'content': 0.22692640125751495, 'timestamp': '2025-09-30 22:28:14.144376', 'step': 10505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:14.219892', 'step': 10505, 'epoch': 2} {'type': 'loss', 'content': 0.07571451365947723, 'timestamp': '2025-09-30 22:28:14.223046', 'step': 10506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:14.304075', 'step': 10506, 'epoch': 2} {'type': 'loss', 'content': 0.19501031935214996, 'timestamp': '2025-09-30 22:28:14.307403', 'step': 10507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:14.364994', 'step': 10507, 'epoch': 2} {'type': 'loss', 'content': 0.09994372725486755, 'timestamp': '2025-09-30 22:28:14.372053', 'step': 10508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:14.437456', 'step': 10508, 'epoch': 2} {'type': 'loss', 'content': 0.09750348329544067, 'timestamp': '2025-09-30 22:28:14.440997', 'step': 10509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:14.521452', 'step': 10509, 'epoch': 2} {'type': 'loss', 'content': 0.05911095812916756, 'timestamp': '2025-09-30 22:28:14.524905', 'step': 10510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:14.601693', 'step': 10510, 'epoch': 2} {'type': 'loss', 'content': 0.15921184420585632, 'timestamp': '2025-09-30 22:28:14.605262', 'step': 10511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:14.674706', 'step': 10511, 'epoch': 2} {'type': 'loss', 'content': 0.12769414484500885, 'timestamp': '2025-09-30 22:28:14.681884', 'step': 10512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:14.785303', 'step': 10512, 'epoch': 2} {'type': 'loss', 'content': 0.15515360236167908, 'timestamp': '2025-09-30 22:28:14.788930', 'step': 10513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:14.903303', 'step': 10513, 'epoch': 2} {'type': 'loss', 'content': 0.1466865986585617, 'timestamp': '2025-09-30 22:28:14.906794', 'step': 10514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:14.998323', 'step': 10514, 'epoch': 2} {'type': 'loss', 'content': 0.09297358989715576, 'timestamp': '2025-09-30 22:28:15.001009', 'step': 10515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:15.092502', 'step': 10515, 'epoch': 2} {'type': 'loss', 'content': 0.17453432083129883, 'timestamp': '2025-09-30 22:28:15.106291', 'step': 10516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:15.177723', 'step': 10516, 'epoch': 2} {'type': 'loss', 'content': 0.07134358584880829, 'timestamp': '2025-09-30 22:28:15.180609', 'step': 10517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:15.280968', 'step': 10517, 'epoch': 2} {'type': 'loss', 'content': 0.18606004118919373, 'timestamp': '2025-09-30 22:28:15.284594', 'step': 10518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:15.387791', 'step': 10518, 'epoch': 2} {'type': 'loss', 'content': 0.12719333171844482, 'timestamp': '2025-09-30 22:28:15.392393', 'step': 10519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:15.508957', 'step': 10519, 'epoch': 2} {'type': 'loss', 'content': 0.1845560371875763, 'timestamp': '2025-09-30 22:28:15.517256', 'step': 10520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:15.578570', 'step': 10520, 'epoch': 2} {'type': 'loss', 'content': 0.030598504468798637, 'timestamp': '2025-09-30 22:28:15.584524', 'step': 10521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:15.660397', 'step': 10521, 'epoch': 2} {'type': 'loss', 'content': 0.08376288414001465, 'timestamp': '2025-09-30 22:28:15.664101', 'step': 10522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:15.736394', 'step': 10522, 'epoch': 2} {'type': 'loss', 'content': 0.22514046728610992, 'timestamp': '2025-09-30 22:28:15.740063', 'step': 10523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:15.802572', 'step': 10523, 'epoch': 2} {'type': 'loss', 'content': 0.13549311459064484, 'timestamp': '2025-09-30 22:28:15.809610', 'step': 10524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:15.877271', 'step': 10524, 'epoch': 2} {'type': 'loss', 'content': 0.10727111250162125, 'timestamp': '2025-09-30 22:28:15.880310', 'step': 10525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:15.955258', 'step': 10525, 'epoch': 2} {'type': 'loss', 'content': 0.07058344781398773, 'timestamp': '2025-09-30 22:28:15.959099', 'step': 10526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:16.024540', 'step': 10526, 'epoch': 2} {'type': 'loss', 'content': 0.12031528353691101, 'timestamp': '2025-09-30 22:28:16.027915', 'step': 10527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:16.113920', 'step': 10527, 'epoch': 2} {'type': 'loss', 'content': 0.10687755793333054, 'timestamp': '2025-09-30 22:28:16.120596', 'step': 10528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:16.179761', 'step': 10528, 'epoch': 2} {'type': 'loss', 'content': 0.2276536375284195, 'timestamp': '2025-09-30 22:28:16.182784', 'step': 10529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:16.242684', 'step': 10529, 'epoch': 2} {'type': 'loss', 'content': 0.17000539600849152, 'timestamp': '2025-09-30 22:28:16.245468', 'step': 10530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:16.310834', 'step': 10530, 'epoch': 2} {'type': 'loss', 'content': 0.17572130262851715, 'timestamp': '2025-09-30 22:28:16.322322', 'step': 10531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:16.384612', 'step': 10531, 'epoch': 2} {'type': 'loss', 'content': 0.1427534818649292, 'timestamp': '2025-09-30 22:28:16.392321', 'step': 10532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:16.450704', 'step': 10532, 'epoch': 2} {'type': 'loss', 'content': 0.1805621236562729, 'timestamp': '2025-09-30 22:28:16.454494', 'step': 10533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:16.512298', 'step': 10533, 'epoch': 2} {'type': 'loss', 'content': 0.1014372706413269, 'timestamp': '2025-09-30 22:28:16.515893', 'step': 10534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:16.598255', 'step': 10534, 'epoch': 2} {'type': 'loss', 'content': 0.09558548033237457, 'timestamp': '2025-09-30 22:28:16.601727', 'step': 10535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:16.672115', 'step': 10535, 'epoch': 2} {'type': 'loss', 'content': 0.05206003412604332, 'timestamp': '2025-09-30 22:28:16.678923', 'step': 10536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:16.753467', 'step': 10536, 'epoch': 2} {'type': 'loss', 'content': 0.12885430455207825, 'timestamp': '2025-09-30 22:28:16.760794', 'step': 10537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:16.819857', 'step': 10537, 'epoch': 2} {'type': 'loss', 'content': 0.10992874205112457, 'timestamp': '2025-09-30 22:28:16.823930', 'step': 10538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:16.885781', 'step': 10538, 'epoch': 2} {'type': 'loss', 'content': 0.09957903623580933, 'timestamp': '2025-09-30 22:28:16.888723', 'step': 10539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:16.946708', 'step': 10539, 'epoch': 2} {'type': 'loss', 'content': 0.2331186830997467, 'timestamp': '2025-09-30 22:28:16.953509', 'step': 10540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:17.021280', 'step': 10540, 'epoch': 2} {'type': 'loss', 'content': 0.11142725497484207, 'timestamp': '2025-09-30 22:28:17.026593', 'step': 10541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:17.088774', 'step': 10541, 'epoch': 2} {'type': 'loss', 'content': 0.1727973222732544, 'timestamp': '2025-09-30 22:28:17.091542', 'step': 10542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:17.173097', 'step': 10542, 'epoch': 2} {'type': 'loss', 'content': 0.13763442635536194, 'timestamp': '2025-09-30 22:28:17.189856', 'step': 10543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:17.261269', 'step': 10543, 'epoch': 2} {'type': 'loss', 'content': 0.1272221803665161, 'timestamp': '2025-09-30 22:28:17.267556', 'step': 10544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:17.327162', 'step': 10544, 'epoch': 2} {'type': 'loss', 'content': 0.04528416693210602, 'timestamp': '2025-09-30 22:28:17.332354', 'step': 10545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:17.391391', 'step': 10545, 'epoch': 2} {'type': 'loss', 'content': 0.12700094282627106, 'timestamp': '2025-09-30 22:28:17.394760', 'step': 10546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:17.467628', 'step': 10546, 'epoch': 2} {'type': 'loss', 'content': 0.11300952732563019, 'timestamp': '2025-09-30 22:28:17.471984', 'step': 10547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:17.556634', 'step': 10547, 'epoch': 2} {'type': 'loss', 'content': 0.18591703474521637, 'timestamp': '2025-09-30 22:28:17.565597', 'step': 10548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:17.631424', 'step': 10548, 'epoch': 2} {'type': 'loss', 'content': 0.19030633568763733, 'timestamp': '2025-09-30 22:28:17.635278', 'step': 10549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:17.708072', 'step': 10549, 'epoch': 2} {'type': 'loss', 'content': 0.06856270879507065, 'timestamp': '2025-09-30 22:28:17.719453', 'step': 10550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:17.781650', 'step': 10550, 'epoch': 2} {'type': 'loss', 'content': 0.16940705478191376, 'timestamp': '2025-09-30 22:28:17.787062', 'step': 10551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:17.862994', 'step': 10551, 'epoch': 2} {'type': 'loss', 'content': 0.13911008834838867, 'timestamp': '2025-09-30 22:28:17.871123', 'step': 10552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:17.929118', 'step': 10552, 'epoch': 2} {'type': 'loss', 'content': 0.17647916078567505, 'timestamp': '2025-09-30 22:28:17.936094', 'step': 10553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:18.009213', 'step': 10553, 'epoch': 2} {'type': 'loss', 'content': 0.07781679928302765, 'timestamp': '2025-09-30 22:28:18.012056', 'step': 10554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:18.082986', 'step': 10554, 'epoch': 2} {'type': 'loss', 'content': 0.11920133233070374, 'timestamp': '2025-09-30 22:28:18.086085', 'step': 10555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:18.161639', 'step': 10555, 'epoch': 2} {'type': 'loss', 'content': 0.12214160710573196, 'timestamp': '2025-09-30 22:28:18.171092', 'step': 10556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:18.241443', 'step': 10556, 'epoch': 2} {'type': 'loss', 'content': 0.08381553739309311, 'timestamp': '2025-09-30 22:28:18.244742', 'step': 10557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:18.328301', 'step': 10557, 'epoch': 2} {'type': 'loss', 'content': 0.14666925370693207, 'timestamp': '2025-09-30 22:28:18.348025', 'step': 10558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:18.406384', 'step': 10558, 'epoch': 2} {'type': 'loss', 'content': 0.10002248734235764, 'timestamp': '2025-09-30 22:28:18.412984', 'step': 10559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:18.473477', 'step': 10559, 'epoch': 2} {'type': 'loss', 'content': 0.15622322261333466, 'timestamp': '2025-09-30 22:28:18.481756', 'step': 10560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:18.545564', 'step': 10560, 'epoch': 2} {'type': 'loss', 'content': 0.17416362464427948, 'timestamp': '2025-09-30 22:28:18.549565', 'step': 10561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:18.609082', 'step': 10561, 'epoch': 2} {'type': 'loss', 'content': 0.08591438829898834, 'timestamp': '2025-09-30 22:28:18.615776', 'step': 10562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:18.692304', 'step': 10562, 'epoch': 2} {'type': 'loss', 'content': 0.23501801490783691, 'timestamp': '2025-09-30 22:28:18.695163', 'step': 10563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:18.770102', 'step': 10563, 'epoch': 2} {'type': 'loss', 'content': 0.08149883151054382, 'timestamp': '2025-09-30 22:28:18.784340', 'step': 10564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:18.852707', 'step': 10564, 'epoch': 2} {'type': 'loss', 'content': 0.16214554011821747, 'timestamp': '2025-09-30 22:28:18.856776', 'step': 10565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:18.929216', 'step': 10565, 'epoch': 2} {'type': 'loss', 'content': 0.1517936736345291, 'timestamp': '2025-09-30 22:28:18.934553', 'step': 10566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:18.992673', 'step': 10566, 'epoch': 2} {'type': 'loss', 'content': 0.08410383760929108, 'timestamp': '2025-09-30 22:28:19.001403', 'step': 10567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:19.093806', 'step': 10567, 'epoch': 2} {'type': 'loss', 'content': 0.10274097323417664, 'timestamp': '2025-09-30 22:28:19.105992', 'step': 10568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:19.177485', 'step': 10568, 'epoch': 2} {'type': 'loss', 'content': 0.07148342579603195, 'timestamp': '2025-09-30 22:28:19.180236', 'step': 10569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:19.244117', 'step': 10569, 'epoch': 2} {'type': 'loss', 'content': 0.10216595232486725, 'timestamp': '2025-09-30 22:28:19.257670', 'step': 10570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:19.325431', 'step': 10570, 'epoch': 2} {'type': 'loss', 'content': 0.12233875691890717, 'timestamp': '2025-09-30 22:28:19.328219', 'step': 10571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:19.401802', 'step': 10571, 'epoch': 2} {'type': 'loss', 'content': 0.17078010737895966, 'timestamp': '2025-09-30 22:28:19.423802', 'step': 10572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:19.488948', 'step': 10572, 'epoch': 2} {'type': 'loss', 'content': 0.12402395159006119, 'timestamp': '2025-09-30 22:28:19.498922', 'step': 10573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:19.567356', 'step': 10573, 'epoch': 2} {'type': 'loss', 'content': 0.1593455821275711, 'timestamp': '2025-09-30 22:28:19.575682', 'step': 10574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:19.637604', 'step': 10574, 'epoch': 2} {'type': 'loss', 'content': 0.1231391429901123, 'timestamp': '2025-09-30 22:28:19.641570', 'step': 10575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:19.704695', 'step': 10575, 'epoch': 2} {'type': 'loss', 'content': 0.20933891832828522, 'timestamp': '2025-09-30 22:28:19.711880', 'step': 10576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:19.780850', 'step': 10576, 'epoch': 2} {'type': 'loss', 'content': 0.14966699481010437, 'timestamp': '2025-09-30 22:28:19.785195', 'step': 10577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:19.847092', 'step': 10577, 'epoch': 2} {'type': 'loss', 'content': 0.06398090720176697, 'timestamp': '2025-09-30 22:28:19.853144', 'step': 10578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:19.917284', 'step': 10578, 'epoch': 2} {'type': 'loss', 'content': 0.1324346959590912, 'timestamp': '2025-09-30 22:28:19.927073', 'step': 10579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:19.987939', 'step': 10579, 'epoch': 2} {'type': 'loss', 'content': 0.056055884808301926, 'timestamp': '2025-09-30 22:28:19.999422', 'step': 10580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:20.061299', 'step': 10580, 'epoch': 2} {'type': 'loss', 'content': 0.10595495998859406, 'timestamp': '2025-09-30 22:28:20.078931', 'step': 10581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:20.139171', 'step': 10581, 'epoch': 2} {'type': 'loss', 'content': 0.13587696850299835, 'timestamp': '2025-09-30 22:28:20.145822', 'step': 10582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:20.210645', 'step': 10582, 'epoch': 2} {'type': 'loss', 'content': 0.09612102061510086, 'timestamp': '2025-09-30 22:28:20.214265', 'step': 10583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:20.287614', 'step': 10583, 'epoch': 2} {'type': 'loss', 'content': 0.08196265250444412, 'timestamp': '2025-09-30 22:28:20.294669', 'step': 10584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:20.357905', 'step': 10584, 'epoch': 2} {'type': 'loss', 'content': 0.10898169130086899, 'timestamp': '2025-09-30 22:28:20.362748', 'step': 10585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:20.432063', 'step': 10585, 'epoch': 2} {'type': 'loss', 'content': 0.13783787190914154, 'timestamp': '2025-09-30 22:28:20.435636', 'step': 10586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:20.495441', 'step': 10586, 'epoch': 2} {'type': 'loss', 'content': 0.1064528152346611, 'timestamp': '2025-09-30 22:28:20.503566', 'step': 10587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:20.574474', 'step': 10587, 'epoch': 2} {'type': 'loss', 'content': 0.07931812852621078, 'timestamp': '2025-09-30 22:28:20.582480', 'step': 10588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:20.643284', 'step': 10588, 'epoch': 2} {'type': 'loss', 'content': 0.10301805287599564, 'timestamp': '2025-09-30 22:28:20.647750', 'step': 10589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:20.707869', 'step': 10589, 'epoch': 2} {'type': 'loss', 'content': 0.16144660115242004, 'timestamp': '2025-09-30 22:28:20.712318', 'step': 10590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:20.784509', 'step': 10590, 'epoch': 2} {'type': 'loss', 'content': 0.11685159802436829, 'timestamp': '2025-09-30 22:28:20.790192', 'step': 10591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:20.849178', 'step': 10591, 'epoch': 2} {'type': 'loss', 'content': 0.1396411657333374, 'timestamp': '2025-09-30 22:28:20.857217', 'step': 10592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:20.923315', 'step': 10592, 'epoch': 2} {'type': 'loss', 'content': 0.16635675728321075, 'timestamp': '2025-09-30 22:28:20.928026', 'step': 10593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:20.988960', 'step': 10593, 'epoch': 2} {'type': 'loss', 'content': 0.0882619321346283, 'timestamp': '2025-09-30 22:28:20.993628', 'step': 10594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:21.055149', 'step': 10594, 'epoch': 2} {'type': 'loss', 'content': 0.13039126992225647, 'timestamp': '2025-09-30 22:28:21.059206', 'step': 10595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:21.120329', 'step': 10595, 'epoch': 2} {'type': 'loss', 'content': 0.15515904128551483, 'timestamp': '2025-09-30 22:28:21.127816', 'step': 10596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:21.186940', 'step': 10596, 'epoch': 2} {'type': 'loss', 'content': 0.1109948605298996, 'timestamp': '2025-09-30 22:28:21.191376', 'step': 10597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:21.251933', 'step': 10597, 'epoch': 2} {'type': 'loss', 'content': 0.07302374392747879, 'timestamp': '2025-09-30 22:28:21.255094', 'step': 10598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:21.314617', 'step': 10598, 'epoch': 2} {'type': 'loss', 'content': 0.08462497591972351, 'timestamp': '2025-09-30 22:28:21.319201', 'step': 10599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:21.393185', 'step': 10599, 'epoch': 2} {'type': 'loss', 'content': 0.18874692916870117, 'timestamp': '2025-09-30 22:28:21.404398', 'step': 10600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:28:21.479788', 'step': 10600, 'epoch': 2} {'type': 'loss', 'content': 0.18539944291114807, 'timestamp': '2025-09-30 22:28:21.484224', 'step': 10601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:21.547193', 'step': 10601, 'epoch': 2} {'type': 'loss', 'content': 0.07401065528392792, 'timestamp': '2025-09-30 22:28:21.552565', 'step': 10602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:21.632510', 'step': 10602, 'epoch': 2} {'type': 'loss', 'content': 0.28120043873786926, 'timestamp': '2025-09-30 22:28:21.636856', 'step': 10603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:21.702587', 'step': 10603, 'epoch': 2} {'type': 'loss', 'content': 0.10114462673664093, 'timestamp': '2025-09-30 22:28:21.711901', 'step': 10604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:21.783636', 'step': 10604, 'epoch': 2} {'type': 'loss', 'content': 0.12566007673740387, 'timestamp': '2025-09-30 22:28:21.788635', 'step': 10605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:21.875499', 'step': 10605, 'epoch': 2} {'type': 'loss', 'content': 0.1813972443342209, 'timestamp': '2025-09-30 22:28:21.878962', 'step': 10606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:21.937850', 'step': 10606, 'epoch': 2} {'type': 'loss', 'content': 0.1386396288871765, 'timestamp': '2025-09-30 22:28:21.941081', 'step': 10607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:22.000590', 'step': 10607, 'epoch': 2} {'type': 'loss', 'content': 0.18360938131809235, 'timestamp': '2025-09-30 22:28:22.015573', 'step': 10608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:22.085226', 'step': 10608, 'epoch': 2} {'type': 'loss', 'content': 0.15101946890354156, 'timestamp': '2025-09-30 22:28:22.091326', 'step': 10609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:22.152988', 'step': 10609, 'epoch': 2} {'type': 'loss', 'content': 0.1498226821422577, 'timestamp': '2025-09-30 22:28:22.156783', 'step': 10610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:22.221839', 'step': 10610, 'epoch': 2} {'type': 'loss', 'content': 0.2043335884809494, 'timestamp': '2025-09-30 22:28:22.226757', 'step': 10611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:22.285934', 'step': 10611, 'epoch': 2} {'type': 'loss', 'content': 0.10411883890628815, 'timestamp': '2025-09-30 22:28:22.307221', 'step': 10612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:22.367175', 'step': 10612, 'epoch': 2} {'type': 'loss', 'content': 0.10241792351007462, 'timestamp': '2025-09-30 22:28:22.371007', 'step': 10613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:22.442635', 'step': 10613, 'epoch': 2} {'type': 'loss', 'content': 0.11666105687618256, 'timestamp': '2025-09-30 22:28:22.446144', 'step': 10614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:22.515530', 'step': 10614, 'epoch': 2} {'type': 'loss', 'content': 0.1612369567155838, 'timestamp': '2025-09-30 22:28:22.519143', 'step': 10615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:22.584569', 'step': 10615, 'epoch': 2} {'type': 'loss', 'content': 0.16993749141693115, 'timestamp': '2025-09-30 22:28:22.592286', 'step': 10616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:22.651625', 'step': 10616, 'epoch': 2} {'type': 'loss', 'content': 0.17522095143795013, 'timestamp': '2025-09-30 22:28:22.655878', 'step': 10617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:22.715378', 'step': 10617, 'epoch': 2} {'type': 'loss', 'content': 0.11775445193052292, 'timestamp': '2025-09-30 22:28:22.727762', 'step': 10618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:22.795737', 'step': 10618, 'epoch': 2} {'type': 'loss', 'content': 0.08311619609594345, 'timestamp': '2025-09-30 22:28:22.799387', 'step': 10619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:22.858257', 'step': 10619, 'epoch': 2} {'type': 'loss', 'content': 0.1377759426832199, 'timestamp': '2025-09-30 22:28:22.868454', 'step': 10620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:22.926137', 'step': 10620, 'epoch': 2} {'type': 'loss', 'content': 0.15467342734336853, 'timestamp': '2025-09-30 22:28:22.934898', 'step': 10621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:22.996029', 'step': 10621, 'epoch': 2} {'type': 'loss', 'content': 0.11141902208328247, 'timestamp': '2025-09-30 22:28:22.999992', 'step': 10622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:23.073140', 'step': 10622, 'epoch': 2} {'type': 'loss', 'content': 0.17566893994808197, 'timestamp': '2025-09-30 22:28:23.076787', 'step': 10623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:23.138019', 'step': 10623, 'epoch': 2} {'type': 'loss', 'content': 0.17800885438919067, 'timestamp': '2025-09-30 22:28:23.152541', 'step': 10624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:23.216764', 'step': 10624, 'epoch': 2} {'type': 'loss', 'content': 0.23195436596870422, 'timestamp': '2025-09-30 22:28:23.231296', 'step': 10625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:23.291210', 'step': 10625, 'epoch': 2} {'type': 'loss', 'content': 0.14898283779621124, 'timestamp': '2025-09-30 22:28:23.294286', 'step': 10626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:23.359726', 'step': 10626, 'epoch': 2} {'type': 'loss', 'content': 0.1899927258491516, 'timestamp': '2025-09-30 22:28:23.367138', 'step': 10627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:23.428957', 'step': 10627, 'epoch': 2} {'type': 'loss', 'content': 0.22370007634162903, 'timestamp': '2025-09-30 22:28:23.444162', 'step': 10628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:23.502792', 'step': 10628, 'epoch': 2} {'type': 'loss', 'content': 0.07234429568052292, 'timestamp': '2025-09-30 22:28:23.506856', 'step': 10629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:23.581960', 'step': 10629, 'epoch': 2} {'type': 'loss', 'content': 0.11339935660362244, 'timestamp': '2025-09-30 22:28:23.591007', 'step': 10630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:23.651676', 'step': 10630, 'epoch': 2} {'type': 'loss', 'content': 0.12314165383577347, 'timestamp': '2025-09-30 22:28:23.656265', 'step': 10631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:23.716807', 'step': 10631, 'epoch': 2} {'type': 'loss', 'content': 0.16920258104801178, 'timestamp': '2025-09-30 22:28:23.725616', 'step': 10632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:23.790353', 'step': 10632, 'epoch': 2} {'type': 'loss', 'content': 0.16117195785045624, 'timestamp': '2025-09-30 22:28:23.795996', 'step': 10633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:23.865268', 'step': 10633, 'epoch': 2} {'type': 'loss', 'content': 0.14422105252742767, 'timestamp': '2025-09-30 22:28:23.869463', 'step': 10634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:23.942767', 'step': 10634, 'epoch': 2} {'type': 'loss', 'content': 0.14266827702522278, 'timestamp': '2025-09-30 22:28:23.949356', 'step': 10635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:24.008473', 'step': 10635, 'epoch': 2} {'type': 'loss', 'content': 0.16065581142902374, 'timestamp': '2025-09-30 22:28:24.017781', 'step': 10636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:24.079073', 'step': 10636, 'epoch': 2} {'type': 'loss', 'content': 0.15255065262317657, 'timestamp': '2025-09-30 22:28:24.084913', 'step': 10637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:24.153842', 'step': 10637, 'epoch': 2} {'type': 'loss', 'content': 0.16485217213630676, 'timestamp': '2025-09-30 22:28:24.158222', 'step': 10638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:24.227628', 'step': 10638, 'epoch': 2} {'type': 'loss', 'content': 0.13733644783496857, 'timestamp': '2025-09-30 22:28:24.231189', 'step': 10639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:24.290519', 'step': 10639, 'epoch': 2} {'type': 'loss', 'content': 0.09706153720617294, 'timestamp': '2025-09-30 22:28:24.297455', 'step': 10640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:24.357709', 'step': 10640, 'epoch': 2} {'type': 'loss', 'content': 0.1084994524717331, 'timestamp': '2025-09-30 22:28:24.360929', 'step': 10641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:24.427263', 'step': 10641, 'epoch': 2} {'type': 'loss', 'content': 0.08677889406681061, 'timestamp': '2025-09-30 22:28:24.431468', 'step': 10642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:24.494424', 'step': 10642, 'epoch': 2} {'type': 'loss', 'content': 0.09202388674020767, 'timestamp': '2025-09-30 22:28:24.497979', 'step': 10643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:24.557369', 'step': 10643, 'epoch': 2} {'type': 'loss', 'content': 0.11823248863220215, 'timestamp': '2025-09-30 22:28:24.565768', 'step': 10644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:24.633200', 'step': 10644, 'epoch': 2} {'type': 'loss', 'content': 0.2501397132873535, 'timestamp': '2025-09-30 22:28:24.637930', 'step': 10645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:24.695516', 'step': 10645, 'epoch': 2} {'type': 'loss', 'content': 0.16198162734508514, 'timestamp': '2025-09-30 22:28:24.698483', 'step': 10646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:24.784435', 'step': 10646, 'epoch': 2} {'type': 'loss', 'content': 0.10970886051654816, 'timestamp': '2025-09-30 22:28:24.792857', 'step': 10647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:24.852287', 'step': 10647, 'epoch': 2} {'type': 'loss', 'content': 0.1785246878862381, 'timestamp': '2025-09-30 22:28:24.860350', 'step': 10648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:24.919748', 'step': 10648, 'epoch': 2} {'type': 'loss', 'content': 0.12803101539611816, 'timestamp': '2025-09-30 22:28:24.923358', 'step': 10649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:24.984955', 'step': 10649, 'epoch': 2} {'type': 'loss', 'content': 0.09984487295150757, 'timestamp': '2025-09-30 22:28:24.989645', 'step': 10650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:25.063984', 'step': 10650, 'epoch': 2} {'type': 'loss', 'content': 0.08897164463996887, 'timestamp': '2025-09-30 22:28:25.067185', 'step': 10651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:25.137648', 'step': 10651, 'epoch': 2} {'type': 'loss', 'content': 0.08799577504396439, 'timestamp': '2025-09-30 22:28:25.144462', 'step': 10652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:25.203426', 'step': 10652, 'epoch': 2} {'type': 'loss', 'content': 0.09296993911266327, 'timestamp': '2025-09-30 22:28:25.207538', 'step': 10653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:25.282434', 'step': 10653, 'epoch': 2} {'type': 'loss', 'content': 0.14611072838306427, 'timestamp': '2025-09-30 22:28:25.287732', 'step': 10654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:25.348092', 'step': 10654, 'epoch': 2} {'type': 'loss', 'content': 0.07769133895635605, 'timestamp': '2025-09-30 22:28:25.353156', 'step': 10655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:25.413228', 'step': 10655, 'epoch': 2} {'type': 'loss', 'content': 0.08292770385742188, 'timestamp': '2025-09-30 22:28:25.420773', 'step': 10656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:25.489122', 'step': 10656, 'epoch': 2} {'type': 'loss', 'content': 0.055832818150520325, 'timestamp': '2025-09-30 22:28:25.494348', 'step': 10657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:25.556485', 'step': 10657, 'epoch': 2} {'type': 'loss', 'content': 0.06996631622314453, 'timestamp': '2025-09-30 22:28:25.560867', 'step': 10658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:25.632058', 'step': 10658, 'epoch': 2} {'type': 'loss', 'content': 0.15225008130073547, 'timestamp': '2025-09-30 22:28:25.637377', 'step': 10659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:25.700565', 'step': 10659, 'epoch': 2} {'type': 'loss', 'content': 0.09830235689878464, 'timestamp': '2025-09-30 22:28:25.709673', 'step': 10660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:25.777664', 'step': 10660, 'epoch': 2} {'type': 'loss', 'content': 0.16030634939670563, 'timestamp': '2025-09-30 22:28:25.784303', 'step': 10661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:25.842202', 'step': 10661, 'epoch': 2} {'type': 'loss', 'content': 0.182448148727417, 'timestamp': '2025-09-30 22:28:25.846257', 'step': 10662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:25.917662', 'step': 10662, 'epoch': 2} {'type': 'loss', 'content': 0.18097423017024994, 'timestamp': '2025-09-30 22:28:25.923339', 'step': 10663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:25.991981', 'step': 10663, 'epoch': 2} {'type': 'loss', 'content': 0.17022620141506195, 'timestamp': '2025-09-30 22:28:26.000224', 'step': 10664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:26.061651', 'step': 10664, 'epoch': 2} {'type': 'loss', 'content': 0.11773692071437836, 'timestamp': '2025-09-30 22:28:26.065308', 'step': 10665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:26.125721', 'step': 10665, 'epoch': 2} {'type': 'loss', 'content': 0.104569211602211, 'timestamp': '2025-09-30 22:28:26.129210', 'step': 10666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:26.190431', 'step': 10666, 'epoch': 2} {'type': 'loss', 'content': 0.11001525074243546, 'timestamp': '2025-09-30 22:28:26.194809', 'step': 10667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:26.253586', 'step': 10667, 'epoch': 2} {'type': 'loss', 'content': 0.08967724442481995, 'timestamp': '2025-09-30 22:28:26.261518', 'step': 10668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:26.324458', 'step': 10668, 'epoch': 2} {'type': 'loss', 'content': 0.06913680583238602, 'timestamp': '2025-09-30 22:28:26.328524', 'step': 10669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:26.388245', 'step': 10669, 'epoch': 2} {'type': 'loss', 'content': 0.15808835625648499, 'timestamp': '2025-09-30 22:28:26.392111', 'step': 10670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:26.450241', 'step': 10670, 'epoch': 2} {'type': 'loss', 'content': 0.14213840663433075, 'timestamp': '2025-09-30 22:28:26.454913', 'step': 10671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:26.513281', 'step': 10671, 'epoch': 2} {'type': 'loss', 'content': 0.10967611521482468, 'timestamp': '2025-09-30 22:28:26.521852', 'step': 10672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:26.586876', 'step': 10672, 'epoch': 2} {'type': 'loss', 'content': 0.1305740624666214, 'timestamp': '2025-09-30 22:28:26.591452', 'step': 10673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:26.651697', 'step': 10673, 'epoch': 2} {'type': 'loss', 'content': 0.13123305141925812, 'timestamp': '2025-09-30 22:28:26.660854', 'step': 10674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:26.730311', 'step': 10674, 'epoch': 2} {'type': 'loss', 'content': 0.15051811933517456, 'timestamp': '2025-09-30 22:28:26.733790', 'step': 10675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:26.793832', 'step': 10675, 'epoch': 2} {'type': 'loss', 'content': 0.11171436309814453, 'timestamp': '2025-09-30 22:28:26.803880', 'step': 10676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:26.869123', 'step': 10676, 'epoch': 2} {'type': 'loss', 'content': 0.20318154990673065, 'timestamp': '2025-09-30 22:28:26.873294', 'step': 10677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:26.935856', 'step': 10677, 'epoch': 2} {'type': 'loss', 'content': 0.13085326552391052, 'timestamp': '2025-09-30 22:28:26.940207', 'step': 10678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:27.004169', 'step': 10678, 'epoch': 2} {'type': 'loss', 'content': 0.1474069207906723, 'timestamp': '2025-09-30 22:28:27.009548', 'step': 10679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:27.084639', 'step': 10679, 'epoch': 2} {'type': 'loss', 'content': 0.11632287502288818, 'timestamp': '2025-09-30 22:28:27.093056', 'step': 10680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:27.166804', 'step': 10680, 'epoch': 2} {'type': 'loss', 'content': 0.17085246741771698, 'timestamp': '2025-09-30 22:28:27.178945', 'step': 10681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:27.242412', 'step': 10681, 'epoch': 2} {'type': 'loss', 'content': 0.12970027327537537, 'timestamp': '2025-09-30 22:28:27.246277', 'step': 10682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:27.306979', 'step': 10682, 'epoch': 2} {'type': 'loss', 'content': 0.1930035650730133, 'timestamp': '2025-09-30 22:28:27.325281', 'step': 10683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:28:27.385420', 'step': 10683, 'epoch': 2} {'type': 'loss', 'content': 0.14093270897865295, 'timestamp': '2025-09-30 22:28:27.393801', 'step': 10684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:27.452357', 'step': 10684, 'epoch': 2} {'type': 'loss', 'content': 0.11180613934993744, 'timestamp': '2025-09-30 22:28:27.460753', 'step': 10685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:27.523442', 'step': 10685, 'epoch': 2} {'type': 'loss', 'content': 0.09753149002790451, 'timestamp': '2025-09-30 22:28:27.526533', 'step': 10686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:27.615124', 'step': 10686, 'epoch': 2} {'type': 'loss', 'content': 0.17302465438842773, 'timestamp': '2025-09-30 22:28:27.631562', 'step': 10687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:27.711909', 'step': 10687, 'epoch': 2} {'type': 'loss', 'content': 0.2369646579027176, 'timestamp': '2025-09-30 22:28:27.719415', 'step': 10688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:27.778415', 'step': 10688, 'epoch': 2} {'type': 'loss', 'content': 0.07820604741573334, 'timestamp': '2025-09-30 22:28:27.781782', 'step': 10689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:27.870400', 'step': 10689, 'epoch': 2} {'type': 'loss', 'content': 0.14719773828983307, 'timestamp': '2025-09-30 22:28:27.876057', 'step': 10690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:27.943077', 'step': 10690, 'epoch': 2} {'type': 'loss', 'content': 0.11610263586044312, 'timestamp': '2025-09-30 22:28:27.947568', 'step': 10691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:28.018713', 'step': 10691, 'epoch': 2} {'type': 'loss', 'content': 0.1380624920129776, 'timestamp': '2025-09-30 22:28:28.027230', 'step': 10692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:28.088424', 'step': 10692, 'epoch': 2} {'type': 'loss', 'content': 0.06679686158895493, 'timestamp': '2025-09-30 22:28:28.092847', 'step': 10693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:28.156835', 'step': 10693, 'epoch': 2} {'type': 'loss', 'content': 0.1322583705186844, 'timestamp': '2025-09-30 22:28:28.162012', 'step': 10694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:28.223350', 'step': 10694, 'epoch': 2} {'type': 'loss', 'content': 0.2127620279788971, 'timestamp': '2025-09-30 22:28:28.237567', 'step': 10695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:28.318966', 'step': 10695, 'epoch': 2} {'type': 'loss', 'content': 0.10285425186157227, 'timestamp': '2025-09-30 22:28:28.325625', 'step': 10696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:28.404782', 'step': 10696, 'epoch': 2} {'type': 'loss', 'content': 0.15647852420806885, 'timestamp': '2025-09-30 22:28:28.408427', 'step': 10697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:28.482408', 'step': 10697, 'epoch': 2} {'type': 'loss', 'content': 0.1643647402524948, 'timestamp': '2025-09-30 22:28:28.487920', 'step': 10698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:28.548260', 'step': 10698, 'epoch': 2} {'type': 'loss', 'content': 0.10654404014348984, 'timestamp': '2025-09-30 22:28:28.562175', 'step': 10699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:28.622259', 'step': 10699, 'epoch': 2} {'type': 'loss', 'content': 0.12975314259529114, 'timestamp': '2025-09-30 22:28:28.630896', 'step': 10700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:28.690055', 'step': 10700, 'epoch': 2} {'type': 'loss', 'content': 0.1310059130191803, 'timestamp': '2025-09-30 22:28:28.693430', 'step': 10701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:28.754218', 'step': 10701, 'epoch': 2} {'type': 'loss', 'content': 0.2011125087738037, 'timestamp': '2025-09-30 22:28:28.769493', 'step': 10702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:28.837752', 'step': 10702, 'epoch': 2} {'type': 'loss', 'content': 0.10938310623168945, 'timestamp': '2025-09-30 22:28:28.842021', 'step': 10703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:28.908226', 'step': 10703, 'epoch': 2} {'type': 'loss', 'content': 0.1896848827600479, 'timestamp': '2025-09-30 22:28:28.917110', 'step': 10704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:28.979579', 'step': 10704, 'epoch': 2} {'type': 'loss', 'content': 0.06934035569429398, 'timestamp': '2025-09-30 22:28:28.984181', 'step': 10705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:29.046720', 'step': 10705, 'epoch': 2} {'type': 'loss', 'content': 0.2058950960636139, 'timestamp': '2025-09-30 22:28:29.051351', 'step': 10706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:29.120327', 'step': 10706, 'epoch': 2} {'type': 'loss', 'content': 0.0491696298122406, 'timestamp': '2025-09-30 22:28:29.133198', 'step': 10707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:29.199544', 'step': 10707, 'epoch': 2} {'type': 'loss', 'content': 0.19548165798187256, 'timestamp': '2025-09-30 22:28:29.207293', 'step': 10708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:29.267580', 'step': 10708, 'epoch': 2} {'type': 'loss', 'content': 0.1991022825241089, 'timestamp': '2025-09-30 22:28:29.271596', 'step': 10709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:29.329882', 'step': 10709, 'epoch': 2} {'type': 'loss', 'content': 0.11223677545785904, 'timestamp': '2025-09-30 22:28:29.333084', 'step': 10710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:29.393278', 'step': 10710, 'epoch': 2} {'type': 'loss', 'content': 0.12383788824081421, 'timestamp': '2025-09-30 22:28:29.398394', 'step': 10711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:29.456703', 'step': 10711, 'epoch': 2} {'type': 'loss', 'content': 0.09181548655033112, 'timestamp': '2025-09-30 22:28:29.464256', 'step': 10712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:29.531428', 'step': 10712, 'epoch': 2} {'type': 'loss', 'content': 0.09338117390871048, 'timestamp': '2025-09-30 22:28:29.536655', 'step': 10713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:29.604393', 'step': 10713, 'epoch': 2} {'type': 'loss', 'content': 0.08211147040128708, 'timestamp': '2025-09-30 22:28:29.608410', 'step': 10714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:29.667466', 'step': 10714, 'epoch': 2} {'type': 'loss', 'content': 0.12492800503969193, 'timestamp': '2025-09-30 22:28:29.671280', 'step': 10715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:29.731590', 'step': 10715, 'epoch': 2} {'type': 'loss', 'content': 0.07072276622056961, 'timestamp': '2025-09-30 22:28:29.740042', 'step': 10716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:29.796930', 'step': 10716, 'epoch': 2} {'type': 'loss', 'content': 0.11830529570579529, 'timestamp': '2025-09-30 22:28:29.805145', 'step': 10717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:29.879668', 'step': 10717, 'epoch': 2} {'type': 'loss', 'content': 0.11323845386505127, 'timestamp': '2025-09-30 22:28:29.886437', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:28:45.161068', 'step': 10718, 'epoch': 2} {'type': 'pplx', 'content': 12790.793757392601, 'timestamp': '2025-09-30 22:28:45.182859', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:45.242544', 'step': 10718, 'epoch': 2} {'type': 'loss', 'content': 0.18201160430908203, 'timestamp': '2025-09-30 22:28:45.251252', 'step': 10719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:45.331582', 'step': 10719, 'epoch': 2} {'type': 'loss', 'content': 0.11717487126588821, 'timestamp': '2025-09-30 22:28:45.339089', 'step': 10720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:45.406874', 'step': 10720, 'epoch': 2} {'type': 'loss', 'content': 0.1255587339401245, 'timestamp': '2025-09-30 22:28:45.424577', 'step': 10721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:45.503841', 'step': 10721, 'epoch': 2} {'type': 'loss', 'content': 0.058689530938863754, 'timestamp': '2025-09-30 22:28:45.508763', 'step': 10722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:45.574287', 'step': 10722, 'epoch': 2} {'type': 'loss', 'content': 0.1399042159318924, 'timestamp': '2025-09-30 22:28:45.579639', 'step': 10723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:45.649827', 'step': 10723, 'epoch': 2} {'type': 'loss', 'content': 0.12408756464719772, 'timestamp': '2025-09-30 22:28:45.657816', 'step': 10724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:45.725856', 'step': 10724, 'epoch': 2} {'type': 'loss', 'content': 0.11088277399539948, 'timestamp': '2025-09-30 22:28:45.743198', 'step': 10725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:45.811238', 'step': 10725, 'epoch': 2} {'type': 'loss', 'content': 0.22636505961418152, 'timestamp': '2025-09-30 22:28:45.831371', 'step': 10726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:45.890299', 'step': 10726, 'epoch': 2} {'type': 'loss', 'content': 0.20089976489543915, 'timestamp': '2025-09-30 22:28:45.894628', 'step': 10727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:45.967257', 'step': 10727, 'epoch': 2} {'type': 'loss', 'content': 0.1031092032790184, 'timestamp': '2025-09-30 22:28:45.979751', 'step': 10728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:46.046914', 'step': 10728, 'epoch': 2} {'type': 'loss', 'content': 0.1494712084531784, 'timestamp': '2025-09-30 22:28:46.051958', 'step': 10729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:46.124813', 'step': 10729, 'epoch': 2} {'type': 'loss', 'content': 0.12377790361642838, 'timestamp': '2025-09-30 22:28:46.132360', 'step': 10730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:46.204453', 'step': 10730, 'epoch': 2} {'type': 'loss', 'content': 0.11202798783779144, 'timestamp': '2025-09-30 22:28:46.208360', 'step': 10731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:46.266593', 'step': 10731, 'epoch': 2} {'type': 'loss', 'content': 0.03978210315108299, 'timestamp': '2025-09-30 22:28:46.279639', 'step': 10732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:46.348416', 'step': 10732, 'epoch': 2} {'type': 'loss', 'content': 0.14269433915615082, 'timestamp': '2025-09-30 22:28:46.351511', 'step': 10733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:46.428381', 'step': 10733, 'epoch': 2} {'type': 'loss', 'content': 0.12867681682109833, 'timestamp': '2025-09-30 22:28:46.433736', 'step': 10734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:46.505658', 'step': 10734, 'epoch': 2} {'type': 'loss', 'content': 0.052118632942438126, 'timestamp': '2025-09-30 22:28:46.509543', 'step': 10735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:46.580210', 'step': 10735, 'epoch': 2} {'type': 'loss', 'content': 0.17025534808635712, 'timestamp': '2025-09-30 22:28:46.587247', 'step': 10736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:46.648418', 'step': 10736, 'epoch': 2} {'type': 'loss', 'content': 0.13189926743507385, 'timestamp': '2025-09-30 22:28:46.656546', 'step': 10737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:46.716665', 'step': 10737, 'epoch': 2} {'type': 'loss', 'content': 0.0954798087477684, 'timestamp': '2025-09-30 22:28:46.721905', 'step': 10738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:46.792277', 'step': 10738, 'epoch': 2} {'type': 'loss', 'content': 0.18598416447639465, 'timestamp': '2025-09-30 22:28:46.808213', 'step': 10739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:46.868551', 'step': 10739, 'epoch': 2} {'type': 'loss', 'content': 0.11067338287830353, 'timestamp': '2025-09-30 22:28:46.879085', 'step': 10740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:46.945733', 'step': 10740, 'epoch': 2} {'type': 'loss', 'content': 0.08054393529891968, 'timestamp': '2025-09-30 22:28:46.953111', 'step': 10741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:47.012460', 'step': 10741, 'epoch': 2} {'type': 'loss', 'content': 0.04760124906897545, 'timestamp': '2025-09-30 22:28:47.019288', 'step': 10742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:47.083062', 'step': 10742, 'epoch': 2} {'type': 'loss', 'content': 0.16079753637313843, 'timestamp': '2025-09-30 22:28:47.088974', 'step': 10743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:47.149729', 'step': 10743, 'epoch': 2} {'type': 'loss', 'content': 0.13301144540309906, 'timestamp': '2025-09-30 22:28:47.158402', 'step': 10744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:47.216184', 'step': 10744, 'epoch': 2} {'type': 'loss', 'content': 0.21031875908374786, 'timestamp': '2025-09-30 22:28:47.220946', 'step': 10745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:47.279138', 'step': 10745, 'epoch': 2} {'type': 'loss', 'content': 0.11096164584159851, 'timestamp': '2025-09-30 22:28:47.285105', 'step': 10746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:47.357634', 'step': 10746, 'epoch': 2} {'type': 'loss', 'content': 0.11527573317289352, 'timestamp': '2025-09-30 22:28:47.364523', 'step': 10747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:47.427068', 'step': 10747, 'epoch': 2} {'type': 'loss', 'content': 0.16701442003250122, 'timestamp': '2025-09-30 22:28:47.434866', 'step': 10748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:47.494751', 'step': 10748, 'epoch': 2} {'type': 'loss', 'content': 0.11740023642778397, 'timestamp': '2025-09-30 22:28:47.498128', 'step': 10749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:47.600753', 'step': 10749, 'epoch': 2} {'type': 'loss', 'content': 0.2601020336151123, 'timestamp': '2025-09-30 22:28:47.605109', 'step': 10750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:47.677428', 'step': 10750, 'epoch': 2} {'type': 'loss', 'content': 0.15161459147930145, 'timestamp': '2025-09-30 22:28:47.682767', 'step': 10751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:47.744076', 'step': 10751, 'epoch': 2} {'type': 'loss', 'content': 0.024035487323999405, 'timestamp': '2025-09-30 22:28:47.763141', 'step': 10752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:47.822224', 'step': 10752, 'epoch': 2} {'type': 'loss', 'content': 0.21084162592887878, 'timestamp': '2025-09-30 22:28:47.826415', 'step': 10753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:47.887418', 'step': 10753, 'epoch': 2} {'type': 'loss', 'content': 0.18705090880393982, 'timestamp': '2025-09-30 22:28:47.905449', 'step': 10754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:47.973804', 'step': 10754, 'epoch': 2} {'type': 'loss', 'content': 0.09559793025255203, 'timestamp': '2025-09-30 22:28:47.997079', 'step': 10755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:48.057291', 'step': 10755, 'epoch': 2} {'type': 'loss', 'content': 0.10195188969373703, 'timestamp': '2025-09-30 22:28:48.065538', 'step': 10756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:48.125927', 'step': 10756, 'epoch': 2} {'type': 'loss', 'content': 0.11446984857320786, 'timestamp': '2025-09-30 22:28:48.133094', 'step': 10757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:48.192458', 'step': 10757, 'epoch': 2} {'type': 'loss', 'content': 0.10602681338787079, 'timestamp': '2025-09-30 22:28:48.195472', 'step': 10758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:48.262595', 'step': 10758, 'epoch': 2} {'type': 'loss', 'content': 0.09809298813343048, 'timestamp': '2025-09-30 22:28:48.268595', 'step': 10759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:48.328902', 'step': 10759, 'epoch': 2} {'type': 'loss', 'content': 0.13531291484832764, 'timestamp': '2025-09-30 22:28:48.339869', 'step': 10760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:48.401343', 'step': 10760, 'epoch': 2} {'type': 'loss', 'content': 0.09061209857463837, 'timestamp': '2025-09-30 22:28:48.406587', 'step': 10761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:48.486846', 'step': 10761, 'epoch': 2} {'type': 'loss', 'content': 0.21725691854953766, 'timestamp': '2025-09-30 22:28:48.491070', 'step': 10762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:48.562685', 'step': 10762, 'epoch': 2} {'type': 'loss', 'content': 0.1279924213886261, 'timestamp': '2025-09-30 22:28:48.568370', 'step': 10763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:48.631263', 'step': 10763, 'epoch': 2} {'type': 'loss', 'content': 0.09380511194467545, 'timestamp': '2025-09-30 22:28:48.639225', 'step': 10764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:48.703527', 'step': 10764, 'epoch': 2} {'type': 'loss', 'content': 0.2503199279308319, 'timestamp': '2025-09-30 22:28:48.708267', 'step': 10765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:48.772803', 'step': 10765, 'epoch': 2} {'type': 'loss', 'content': 0.08756227791309357, 'timestamp': '2025-09-30 22:28:48.778546', 'step': 10766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:48.837993', 'step': 10766, 'epoch': 2} {'type': 'loss', 'content': 0.12209483236074448, 'timestamp': '2025-09-30 22:28:48.843267', 'step': 10767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:48.901338', 'step': 10767, 'epoch': 2} {'type': 'loss', 'content': 0.1339428424835205, 'timestamp': '2025-09-30 22:28:48.909204', 'step': 10768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:48.966768', 'step': 10768, 'epoch': 2} {'type': 'loss', 'content': 0.1366426944732666, 'timestamp': '2025-09-30 22:28:48.971194', 'step': 10769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:49.038902', 'step': 10769, 'epoch': 2} {'type': 'loss', 'content': 0.1309032440185547, 'timestamp': '2025-09-30 22:28:49.043037', 'step': 10770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:49.116315', 'step': 10770, 'epoch': 2} {'type': 'loss', 'content': 0.16832929849624634, 'timestamp': '2025-09-30 22:28:49.122028', 'step': 10771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:49.181943', 'step': 10771, 'epoch': 2} {'type': 'loss', 'content': 0.14968343079090118, 'timestamp': '2025-09-30 22:28:49.190097', 'step': 10772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:49.256816', 'step': 10772, 'epoch': 2} {'type': 'loss', 'content': 0.12120860815048218, 'timestamp': '2025-09-30 22:28:49.260749', 'step': 10773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:49.320252', 'step': 10773, 'epoch': 2} {'type': 'loss', 'content': 0.11900655180215836, 'timestamp': '2025-09-30 22:28:49.324971', 'step': 10774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:49.393924', 'step': 10774, 'epoch': 2} {'type': 'loss', 'content': 0.12614476680755615, 'timestamp': '2025-09-30 22:28:49.402716', 'step': 10775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:49.484108', 'step': 10775, 'epoch': 2} {'type': 'loss', 'content': 0.1045505702495575, 'timestamp': '2025-09-30 22:28:49.497458', 'step': 10776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:49.572859', 'step': 10776, 'epoch': 2} {'type': 'loss', 'content': 0.09260471910238266, 'timestamp': '2025-09-30 22:28:49.576497', 'step': 10777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:49.656500', 'step': 10777, 'epoch': 2} {'type': 'loss', 'content': 0.16022677719593048, 'timestamp': '2025-09-30 22:28:49.677196', 'step': 10778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:49.756840', 'step': 10778, 'epoch': 2} {'type': 'loss', 'content': 0.15123477578163147, 'timestamp': '2025-09-30 22:28:49.761794', 'step': 10779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:49.821161', 'step': 10779, 'epoch': 2} {'type': 'loss', 'content': 0.08163614571094513, 'timestamp': '2025-09-30 22:28:49.828831', 'step': 10780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:49.897939', 'step': 10780, 'epoch': 2} {'type': 'loss', 'content': 0.1487964391708374, 'timestamp': '2025-09-30 22:28:49.902853', 'step': 10781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:49.961959', 'step': 10781, 'epoch': 2} {'type': 'loss', 'content': 0.06364543735980988, 'timestamp': '2025-09-30 22:28:49.968998', 'step': 10782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:50.031166', 'step': 10782, 'epoch': 2} {'type': 'loss', 'content': 0.06242668628692627, 'timestamp': '2025-09-30 22:28:50.034577', 'step': 10783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:50.099167', 'step': 10783, 'epoch': 2} {'type': 'loss', 'content': 0.09872392565011978, 'timestamp': '2025-09-30 22:28:50.107658', 'step': 10784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:50.166105', 'step': 10784, 'epoch': 2} {'type': 'loss', 'content': 0.21929173171520233, 'timestamp': '2025-09-30 22:28:50.171339', 'step': 10785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:50.230248', 'step': 10785, 'epoch': 2} {'type': 'loss', 'content': 0.0978580191731453, 'timestamp': '2025-09-30 22:28:50.235173', 'step': 10786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:50.297223', 'step': 10786, 'epoch': 2} {'type': 'loss', 'content': 0.16374057531356812, 'timestamp': '2025-09-30 22:28:50.305935', 'step': 10787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:28:50.364777', 'step': 10787, 'epoch': 2} {'type': 'loss', 'content': 0.15231366455554962, 'timestamp': '2025-09-30 22:28:50.373608', 'step': 10788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:50.433317', 'step': 10788, 'epoch': 2} {'type': 'loss', 'content': 0.1991969645023346, 'timestamp': '2025-09-30 22:28:50.445067', 'step': 10789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:50.503129', 'step': 10789, 'epoch': 2} {'type': 'loss', 'content': 0.1136496514081955, 'timestamp': '2025-09-30 22:28:50.507998', 'step': 10790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:50.570866', 'step': 10790, 'epoch': 2} {'type': 'loss', 'content': 0.06397902220487595, 'timestamp': '2025-09-30 22:28:50.579868', 'step': 10791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:50.650890', 'step': 10791, 'epoch': 2} {'type': 'loss', 'content': 0.15026341378688812, 'timestamp': '2025-09-30 22:28:50.660744', 'step': 10792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:50.719055', 'step': 10792, 'epoch': 2} {'type': 'loss', 'content': 0.07305961847305298, 'timestamp': '2025-09-30 22:28:50.723460', 'step': 10793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:50.782206', 'step': 10793, 'epoch': 2} {'type': 'loss', 'content': 0.15602831542491913, 'timestamp': '2025-09-30 22:28:50.806346', 'step': 10794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:50.865193', 'step': 10794, 'epoch': 2} {'type': 'loss', 'content': 0.07877937704324722, 'timestamp': '2025-09-30 22:28:50.869074', 'step': 10795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:50.929295', 'step': 10795, 'epoch': 2} {'type': 'loss', 'content': 0.14271196722984314, 'timestamp': '2025-09-30 22:28:50.937268', 'step': 10796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:51.000651', 'step': 10796, 'epoch': 2} {'type': 'loss', 'content': 0.08738763630390167, 'timestamp': '2025-09-30 22:28:51.016478', 'step': 10797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:51.075145', 'step': 10797, 'epoch': 2} {'type': 'loss', 'content': 0.08965469896793365, 'timestamp': '2025-09-30 22:28:51.079166', 'step': 10798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:51.143551', 'step': 10798, 'epoch': 2} {'type': 'loss', 'content': 0.2049376666545868, 'timestamp': '2025-09-30 22:28:51.161181', 'step': 10799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-30 22:28:51.237370', 'step': 10799, 'epoch': 2} {'type': 'loss', 'content': 0.11654651165008545, 'timestamp': '2025-09-30 22:28:51.250357', 'step': 10800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:51.313473', 'step': 10800, 'epoch': 2} {'type': 'loss', 'content': 0.18253222107887268, 'timestamp': '2025-09-30 22:28:51.322222', 'step': 10801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:51.381874', 'step': 10801, 'epoch': 2} {'type': 'loss', 'content': 0.24366338551044464, 'timestamp': '2025-09-30 22:28:51.402362', 'step': 10802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:51.461547', 'step': 10802, 'epoch': 2} {'type': 'loss', 'content': 0.1530255526304245, 'timestamp': '2025-09-30 22:28:51.482919', 'step': 10803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:51.545464', 'step': 10803, 'epoch': 2} {'type': 'loss', 'content': 0.17623679339885712, 'timestamp': '2025-09-30 22:28:51.553473', 'step': 10804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:51.610844', 'step': 10804, 'epoch': 2} {'type': 'loss', 'content': 0.11662128567695618, 'timestamp': '2025-09-30 22:28:51.614744', 'step': 10805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:51.674915', 'step': 10805, 'epoch': 2} {'type': 'loss', 'content': 0.13609598577022552, 'timestamp': '2025-09-30 22:28:51.678594', 'step': 10806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:51.738518', 'step': 10806, 'epoch': 2} {'type': 'loss', 'content': 0.12405019253492355, 'timestamp': '2025-09-30 22:28:51.752830', 'step': 10807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:51.823887', 'step': 10807, 'epoch': 2} {'type': 'loss', 'content': 0.1027250662446022, 'timestamp': '2025-09-30 22:28:51.836681', 'step': 10808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:51.920473', 'step': 10808, 'epoch': 2} {'type': 'loss', 'content': 0.20169693231582642, 'timestamp': '2025-09-30 22:28:51.926563', 'step': 10809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:51.994456', 'step': 10809, 'epoch': 2} {'type': 'loss', 'content': 0.100748710334301, 'timestamp': '2025-09-30 22:28:52.000568', 'step': 10810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:52.059808', 'step': 10810, 'epoch': 2} {'type': 'loss', 'content': 0.11340563744306564, 'timestamp': '2025-09-30 22:28:52.065459', 'step': 10811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:52.124945', 'step': 10811, 'epoch': 2} {'type': 'loss', 'content': 0.10237406939268112, 'timestamp': '2025-09-30 22:28:52.132008', 'step': 10812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:52.190236', 'step': 10812, 'epoch': 2} {'type': 'loss', 'content': 0.1082601472735405, 'timestamp': '2025-09-30 22:28:52.194782', 'step': 10813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:52.256481', 'step': 10813, 'epoch': 2} {'type': 'loss', 'content': 0.12740877270698547, 'timestamp': '2025-09-30 22:28:52.262460', 'step': 10814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:52.330011', 'step': 10814, 'epoch': 2} {'type': 'loss', 'content': 0.21115875244140625, 'timestamp': '2025-09-30 22:28:52.335282', 'step': 10815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:52.394815', 'step': 10815, 'epoch': 2} {'type': 'loss', 'content': 0.13603007793426514, 'timestamp': '2025-09-30 22:28:52.409532', 'step': 10816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:52.468398', 'step': 10816, 'epoch': 2} {'type': 'loss', 'content': 0.12135343998670578, 'timestamp': '2025-09-30 22:28:52.472619', 'step': 10817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:52.531204', 'step': 10817, 'epoch': 2} {'type': 'loss', 'content': 0.09216229617595673, 'timestamp': '2025-09-30 22:28:52.549891', 'step': 10818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:52.631380', 'step': 10818, 'epoch': 2} {'type': 'loss', 'content': 0.0757431834936142, 'timestamp': '2025-09-30 22:28:52.650850', 'step': 10819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:52.746824', 'step': 10819, 'epoch': 2} {'type': 'loss', 'content': 0.16181452572345734, 'timestamp': '2025-09-30 22:28:52.778580', 'step': 10820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:52.853737', 'step': 10820, 'epoch': 2} {'type': 'loss', 'content': 0.15344679355621338, 'timestamp': '2025-09-30 22:28:52.870350', 'step': 10821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:52.946206', 'step': 10821, 'epoch': 2} {'type': 'loss', 'content': 0.12244513630867004, 'timestamp': '2025-09-30 22:28:52.973655', 'step': 10822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:53.037720', 'step': 10822, 'epoch': 2} {'type': 'loss', 'content': 0.06295463442802429, 'timestamp': '2025-09-30 22:28:53.087088', 'step': 10823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:53.171311', 'step': 10823, 'epoch': 2} {'type': 'loss', 'content': 0.0878237783908844, 'timestamp': '2025-09-30 22:28:53.188978', 'step': 10824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:53.268658', 'step': 10824, 'epoch': 2} {'type': 'loss', 'content': 0.22079411149024963, 'timestamp': '2025-09-30 22:28:53.276462', 'step': 10825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:53.354471', 'step': 10825, 'epoch': 2} {'type': 'loss', 'content': 0.06567033380270004, 'timestamp': '2025-09-30 22:28:53.368954', 'step': 10826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:53.447608', 'step': 10826, 'epoch': 2} {'type': 'loss', 'content': 0.15365085005760193, 'timestamp': '2025-09-30 22:28:53.480097', 'step': 10827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:28:53.565834', 'step': 10827, 'epoch': 2} {'type': 'loss', 'content': 0.12375498563051224, 'timestamp': '2025-09-30 22:28:53.589100', 'step': 10828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:53.661700', 'step': 10828, 'epoch': 2} {'type': 'loss', 'content': 0.15683726966381073, 'timestamp': '2025-09-30 22:28:53.708748', 'step': 10829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:53.785658', 'step': 10829, 'epoch': 2} {'type': 'loss', 'content': 0.0997554212808609, 'timestamp': '2025-09-30 22:28:53.795388', 'step': 10830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:53.885562', 'step': 10830, 'epoch': 2} {'type': 'loss', 'content': 0.18137361109256744, 'timestamp': '2025-09-30 22:28:53.902832', 'step': 10831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:53.980384', 'step': 10831, 'epoch': 2} {'type': 'loss', 'content': 0.1033332571387291, 'timestamp': '2025-09-30 22:28:53.997197', 'step': 10832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:54.083887', 'step': 10832, 'epoch': 2} {'type': 'loss', 'content': 0.08958779275417328, 'timestamp': '2025-09-30 22:28:54.100777', 'step': 10833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:54.177301', 'step': 10833, 'epoch': 2} {'type': 'loss', 'content': 0.1332634538412094, 'timestamp': '2025-09-30 22:28:54.195913', 'step': 10834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:54.268026', 'step': 10834, 'epoch': 2} {'type': 'loss', 'content': 0.10579487681388855, 'timestamp': '2025-09-30 22:28:54.279554', 'step': 10835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:54.377141', 'step': 10835, 'epoch': 2} {'type': 'loss', 'content': 0.19774626195430756, 'timestamp': '2025-09-30 22:28:54.386561', 'step': 10836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:54.446514', 'step': 10836, 'epoch': 2} {'type': 'loss', 'content': 0.10114198923110962, 'timestamp': '2025-09-30 22:28:54.451525', 'step': 10837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:54.512538', 'step': 10837, 'epoch': 2} {'type': 'loss', 'content': 0.1322540044784546, 'timestamp': '2025-09-30 22:28:54.518414', 'step': 10838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:54.579626', 'step': 10838, 'epoch': 2} {'type': 'loss', 'content': 0.10780265182256699, 'timestamp': '2025-09-30 22:28:54.595415', 'step': 10839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:54.653852', 'step': 10839, 'epoch': 2} {'type': 'loss', 'content': 0.17050223052501678, 'timestamp': '2025-09-30 22:28:54.662333', 'step': 10840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:54.721662', 'step': 10840, 'epoch': 2} {'type': 'loss', 'content': 0.12732163071632385, 'timestamp': '2025-09-30 22:28:54.727448', 'step': 10841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:54.808104', 'step': 10841, 'epoch': 2} {'type': 'loss', 'content': 0.17677265405654907, 'timestamp': '2025-09-30 22:28:54.819990', 'step': 10842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:54.879375', 'step': 10842, 'epoch': 2} {'type': 'loss', 'content': 0.17050127685070038, 'timestamp': '2025-09-30 22:28:54.892351', 'step': 10843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:54.953356', 'step': 10843, 'epoch': 2} {'type': 'loss', 'content': 0.0793050229549408, 'timestamp': '2025-09-30 22:28:54.962137', 'step': 10844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:55.020294', 'step': 10844, 'epoch': 2} {'type': 'loss', 'content': 0.1431802660226822, 'timestamp': '2025-09-30 22:28:55.026206', 'step': 10845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:55.086902', 'step': 10845, 'epoch': 2} {'type': 'loss', 'content': 0.1389637291431427, 'timestamp': '2025-09-30 22:28:55.092976', 'step': 10846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:55.172741', 'step': 10846, 'epoch': 2} {'type': 'loss', 'content': 0.10734627395868301, 'timestamp': '2025-09-30 22:28:55.177532', 'step': 10847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:55.253987', 'step': 10847, 'epoch': 2} {'type': 'loss', 'content': 0.07519736140966415, 'timestamp': '2025-09-30 22:28:55.260551', 'step': 10848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:55.333563', 'step': 10848, 'epoch': 2} {'type': 'loss', 'content': 0.11312249302864075, 'timestamp': '2025-09-30 22:28:55.339881', 'step': 10849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:55.409873', 'step': 10849, 'epoch': 2} {'type': 'loss', 'content': 0.20654799044132233, 'timestamp': '2025-09-30 22:28:55.416594', 'step': 10850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:55.500063', 'step': 10850, 'epoch': 2} {'type': 'loss', 'content': 0.1482999324798584, 'timestamp': '2025-09-30 22:28:55.508516', 'step': 10851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:55.588624', 'step': 10851, 'epoch': 2} {'type': 'loss', 'content': 0.15427899360656738, 'timestamp': '2025-09-30 22:28:55.607480', 'step': 10852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:55.695630', 'step': 10852, 'epoch': 2} {'type': 'loss', 'content': 0.11238903552293777, 'timestamp': '2025-09-30 22:28:55.700251', 'step': 10853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:55.793849', 'step': 10853, 'epoch': 2} {'type': 'loss', 'content': 0.09871339052915573, 'timestamp': '2025-09-30 22:28:55.804994', 'step': 10854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:55.880433', 'step': 10854, 'epoch': 2} {'type': 'loss', 'content': 0.060792356729507446, 'timestamp': '2025-09-30 22:28:55.897231', 'step': 10855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:55.979994', 'step': 10855, 'epoch': 2} {'type': 'loss', 'content': 0.1271372139453888, 'timestamp': '2025-09-30 22:28:55.988510', 'step': 10856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:56.060653', 'step': 10856, 'epoch': 2} {'type': 'loss', 'content': 0.14604869484901428, 'timestamp': '2025-09-30 22:28:56.075191', 'step': 10857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:56.160960', 'step': 10857, 'epoch': 2} {'type': 'loss', 'content': 0.1477992832660675, 'timestamp': '2025-09-30 22:28:56.165497', 'step': 10858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:56.241079', 'step': 10858, 'epoch': 2} {'type': 'loss', 'content': 0.17210552096366882, 'timestamp': '2025-09-30 22:28:56.249761', 'step': 10859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:56.313754', 'step': 10859, 'epoch': 2} {'type': 'loss', 'content': 0.0923067256808281, 'timestamp': '2025-09-30 22:28:56.320881', 'step': 10860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:56.394395', 'step': 10860, 'epoch': 2} {'type': 'loss', 'content': 0.13525646924972534, 'timestamp': '2025-09-30 22:28:56.398674', 'step': 10861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:56.503721', 'step': 10861, 'epoch': 2} {'type': 'loss', 'content': 0.11478015780448914, 'timestamp': '2025-09-30 22:28:56.508476', 'step': 10862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:56.580413', 'step': 10862, 'epoch': 2} {'type': 'loss', 'content': 0.2504972815513611, 'timestamp': '2025-09-30 22:28:56.584401', 'step': 10863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:56.664929', 'step': 10863, 'epoch': 2} {'type': 'loss', 'content': 0.15676367282867432, 'timestamp': '2025-09-30 22:28:56.674395', 'step': 10864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:56.747103', 'step': 10864, 'epoch': 2} {'type': 'loss', 'content': 0.12658166885375977, 'timestamp': '2025-09-30 22:28:56.754444', 'step': 10865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:56.827117', 'step': 10865, 'epoch': 2} {'type': 'loss', 'content': 0.09306355565786362, 'timestamp': '2025-09-30 22:28:56.831357', 'step': 10866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:28:56.907127', 'step': 10866, 'epoch': 2} {'type': 'loss', 'content': 0.1295861303806305, 'timestamp': '2025-09-30 22:28:56.910769', 'step': 10867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:56.981356', 'step': 10867, 'epoch': 2} {'type': 'loss', 'content': 0.16950221359729767, 'timestamp': '2025-09-30 22:28:56.990945', 'step': 10868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:57.062402', 'step': 10868, 'epoch': 2} {'type': 'loss', 'content': 0.09766039252281189, 'timestamp': '2025-09-30 22:28:57.066835', 'step': 10869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:57.126055', 'step': 10869, 'epoch': 2} {'type': 'loss', 'content': 0.08956961333751678, 'timestamp': '2025-09-30 22:28:57.130987', 'step': 10870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:57.190633', 'step': 10870, 'epoch': 2} {'type': 'loss', 'content': 0.05138838291168213, 'timestamp': '2025-09-30 22:28:57.196262', 'step': 10871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:57.256389', 'step': 10871, 'epoch': 2} {'type': 'loss', 'content': 0.18998543918132782, 'timestamp': '2025-09-30 22:28:57.263855', 'step': 10872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:57.327693', 'step': 10872, 'epoch': 2} {'type': 'loss', 'content': 0.1532423049211502, 'timestamp': '2025-09-30 22:28:57.332627', 'step': 10873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:57.399726', 'step': 10873, 'epoch': 2} {'type': 'loss', 'content': 0.11604950577020645, 'timestamp': '2025-09-30 22:28:57.404076', 'step': 10874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:57.464154', 'step': 10874, 'epoch': 2} {'type': 'loss', 'content': 0.1294519156217575, 'timestamp': '2025-09-30 22:28:57.468905', 'step': 10875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:57.528200', 'step': 10875, 'epoch': 2} {'type': 'loss', 'content': 0.12190407514572144, 'timestamp': '2025-09-30 22:28:57.550834', 'step': 10876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:57.626355', 'step': 10876, 'epoch': 2} {'type': 'loss', 'content': 0.07506325840950012, 'timestamp': '2025-09-30 22:28:57.631933', 'step': 10877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:57.691175', 'step': 10877, 'epoch': 2} {'type': 'loss', 'content': 0.10327254235744476, 'timestamp': '2025-09-30 22:28:57.695623', 'step': 10878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:57.756639', 'step': 10878, 'epoch': 2} {'type': 'loss', 'content': 0.12133818119764328, 'timestamp': '2025-09-30 22:28:57.760240', 'step': 10879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:57.819456', 'step': 10879, 'epoch': 2} {'type': 'loss', 'content': 0.057641442865133286, 'timestamp': '2025-09-30 22:28:57.838120', 'step': 10880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:57.905300', 'step': 10880, 'epoch': 2} {'type': 'loss', 'content': 0.1462838351726532, 'timestamp': '2025-09-30 22:28:57.912310', 'step': 10881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:57.973595', 'step': 10881, 'epoch': 2} {'type': 'loss', 'content': 0.08296997845172882, 'timestamp': '2025-09-30 22:28:57.978893', 'step': 10882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:58.052215', 'step': 10882, 'epoch': 2} {'type': 'loss', 'content': 0.14574632048606873, 'timestamp': '2025-09-30 22:28:58.058188', 'step': 10883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:58.130703', 'step': 10883, 'epoch': 2} {'type': 'loss', 'content': 0.10403537005186081, 'timestamp': '2025-09-30 22:28:58.138981', 'step': 10884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.205047', 'step': 10884, 'epoch': 2} {'type': 'loss', 'content': 0.05982545390725136, 'timestamp': '2025-09-30 22:28:58.209483', 'step': 10885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.267850', 'step': 10885, 'epoch': 2} {'type': 'loss', 'content': 0.07792716473340988, 'timestamp': '2025-09-30 22:28:58.272679', 'step': 10886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:58.333420', 'step': 10886, 'epoch': 2} {'type': 'loss', 'content': 0.23101359605789185, 'timestamp': '2025-09-30 22:28:58.338904', 'step': 10887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:58.399335', 'step': 10887, 'epoch': 2} {'type': 'loss', 'content': 0.2354021817445755, 'timestamp': '2025-09-30 22:28:58.405893', 'step': 10888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:58.475553', 'step': 10888, 'epoch': 2} {'type': 'loss', 'content': 0.1421889066696167, 'timestamp': '2025-09-30 22:28:58.480092', 'step': 10889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:58.547046', 'step': 10889, 'epoch': 2} {'type': 'loss', 'content': 0.18708185851573944, 'timestamp': '2025-09-30 22:28:58.550792', 'step': 10890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.617615', 'step': 10890, 'epoch': 2} {'type': 'loss', 'content': 0.12665405869483948, 'timestamp': '2025-09-30 22:28:58.621770', 'step': 10891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.688519', 'step': 10891, 'epoch': 2} {'type': 'loss', 'content': 0.1041986346244812, 'timestamp': '2025-09-30 22:28:58.695731', 'step': 10892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.753705', 'step': 10892, 'epoch': 2} {'type': 'loss', 'content': 0.0663459450006485, 'timestamp': '2025-09-30 22:28:58.758250', 'step': 10893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.818290', 'step': 10893, 'epoch': 2} {'type': 'loss', 'content': 0.14583735167980194, 'timestamp': '2025-09-30 22:28:58.821297', 'step': 10894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.888356', 'step': 10894, 'epoch': 2} {'type': 'loss', 'content': 0.12686531245708466, 'timestamp': '2025-09-30 22:28:58.892076', 'step': 10895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:58.949782', 'step': 10895, 'epoch': 2} {'type': 'loss', 'content': 0.24955441057682037, 'timestamp': '2025-09-30 22:28:58.958111', 'step': 10896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:59.020343', 'step': 10896, 'epoch': 2} {'type': 'loss', 'content': 0.1231069266796112, 'timestamp': '2025-09-30 22:28:59.023660', 'step': 10897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:59.094154', 'step': 10897, 'epoch': 2} {'type': 'loss', 'content': 0.18990473449230194, 'timestamp': '2025-09-30 22:28:59.098339', 'step': 10898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:59.158059', 'step': 10898, 'epoch': 2} {'type': 'loss', 'content': 0.06234070658683777, 'timestamp': '2025-09-30 22:28:59.163010', 'step': 10899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:59.230539', 'step': 10899, 'epoch': 2} {'type': 'loss', 'content': 0.0935196578502655, 'timestamp': '2025-09-30 22:28:59.238498', 'step': 10900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:59.314166', 'step': 10900, 'epoch': 2} {'type': 'loss', 'content': 0.11004376411437988, 'timestamp': '2025-09-30 22:28:59.318532', 'step': 10901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:28:59.386757', 'step': 10901, 'epoch': 2} {'type': 'loss', 'content': 0.16807745397090912, 'timestamp': '2025-09-30 22:28:59.393632', 'step': 10902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:59.456507', 'step': 10902, 'epoch': 2} {'type': 'loss', 'content': 0.12657004594802856, 'timestamp': '2025-09-30 22:28:59.461650', 'step': 10903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:28:59.519114', 'step': 10903, 'epoch': 2} {'type': 'loss', 'content': 0.09244327992200851, 'timestamp': '2025-09-30 22:28:59.526861', 'step': 10904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:59.594767', 'step': 10904, 'epoch': 2} {'type': 'loss', 'content': 0.15204764902591705, 'timestamp': '2025-09-30 22:28:59.604531', 'step': 10905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:28:59.664215', 'step': 10905, 'epoch': 2} {'type': 'loss', 'content': 0.09284327924251556, 'timestamp': '2025-09-30 22:28:59.675654', 'step': 10906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:59.735375', 'step': 10906, 'epoch': 2} {'type': 'loss', 'content': 0.1077553853392601, 'timestamp': '2025-09-30 22:28:59.739229', 'step': 10907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:28:59.799536', 'step': 10907, 'epoch': 2} {'type': 'loss', 'content': 0.18589311838150024, 'timestamp': '2025-09-30 22:28:59.806615', 'step': 10908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:59.863480', 'step': 10908, 'epoch': 2} {'type': 'loss', 'content': 0.13234323263168335, 'timestamp': '2025-09-30 22:28:59.866380', 'step': 10909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:28:59.926961', 'step': 10909, 'epoch': 2} {'type': 'loss', 'content': 0.11165668815374374, 'timestamp': '2025-09-30 22:28:59.930192', 'step': 10910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:00.008176', 'step': 10910, 'epoch': 2} {'type': 'loss', 'content': 0.20833759009838104, 'timestamp': '2025-09-30 22:29:00.013105', 'step': 10911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:00.075378', 'step': 10911, 'epoch': 2} {'type': 'loss', 'content': 0.1593358963727951, 'timestamp': '2025-09-30 22:29:00.085413', 'step': 10912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:00.149316', 'step': 10912, 'epoch': 2} {'type': 'loss', 'content': 0.10588789731264114, 'timestamp': '2025-09-30 22:29:00.153769', 'step': 10913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:00.212511', 'step': 10913, 'epoch': 2} {'type': 'loss', 'content': 0.15620043873786926, 'timestamp': '2025-09-30 22:29:00.217380', 'step': 10914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:00.276624', 'step': 10914, 'epoch': 2} {'type': 'loss', 'content': 0.0914200097322464, 'timestamp': '2025-09-30 22:29:00.282175', 'step': 10915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:00.341526', 'step': 10915, 'epoch': 2} {'type': 'loss', 'content': 0.151947021484375, 'timestamp': '2025-09-30 22:29:00.361036', 'step': 10916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:00.432807', 'step': 10916, 'epoch': 2} {'type': 'loss', 'content': 0.04613697901368141, 'timestamp': '2025-09-30 22:29:00.437477', 'step': 10917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:00.497292', 'step': 10917, 'epoch': 2} {'type': 'loss', 'content': 0.08532499521970749, 'timestamp': '2025-09-30 22:29:00.501302', 'step': 10918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:00.558663', 'step': 10918, 'epoch': 2} {'type': 'loss', 'content': 0.1723775714635849, 'timestamp': '2025-09-30 22:29:00.574374', 'step': 10919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:00.632751', 'step': 10919, 'epoch': 2} {'type': 'loss', 'content': 0.09871742129325867, 'timestamp': '2025-09-30 22:29:00.640779', 'step': 10920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:00.703258', 'step': 10920, 'epoch': 2} {'type': 'loss', 'content': 0.10082198679447174, 'timestamp': '2025-09-30 22:29:00.707462', 'step': 10921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:00.765986', 'step': 10921, 'epoch': 2} {'type': 'loss', 'content': 0.062095168977975845, 'timestamp': '2025-09-30 22:29:00.770991', 'step': 10922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:00.845660', 'step': 10922, 'epoch': 2} {'type': 'loss', 'content': 0.11847378313541412, 'timestamp': '2025-09-30 22:29:00.849409', 'step': 10923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:00.907215', 'step': 10923, 'epoch': 2} {'type': 'loss', 'content': 0.11131492257118225, 'timestamp': '2025-09-30 22:29:00.915291', 'step': 10924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:00.973053', 'step': 10924, 'epoch': 2} {'type': 'loss', 'content': 0.14804081618785858, 'timestamp': '2025-09-30 22:29:00.977463', 'step': 10925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:01.038550', 'step': 10925, 'epoch': 2} {'type': 'loss', 'content': 0.08399779349565506, 'timestamp': '2025-09-30 22:29:01.041748', 'step': 10926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:01.108425', 'step': 10926, 'epoch': 2} {'type': 'loss', 'content': 0.16844724118709564, 'timestamp': '2025-09-30 22:29:01.112530', 'step': 10927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:01.170947', 'step': 10927, 'epoch': 2} {'type': 'loss', 'content': 0.06941001862287521, 'timestamp': '2025-09-30 22:29:01.195014', 'step': 10928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:01.254861', 'step': 10928, 'epoch': 2} {'type': 'loss', 'content': 0.14392930269241333, 'timestamp': '2025-09-30 22:29:01.258876', 'step': 10929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:01.318126', 'step': 10929, 'epoch': 2} {'type': 'loss', 'content': 0.13236768543720245, 'timestamp': '2025-09-30 22:29:01.323209', 'step': 10930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:01.382893', 'step': 10930, 'epoch': 2} {'type': 'loss', 'content': 0.20806002616882324, 'timestamp': '2025-09-30 22:29:01.397315', 'step': 10931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:01.454587', 'step': 10931, 'epoch': 2} {'type': 'loss', 'content': 0.09216777235269547, 'timestamp': '2025-09-30 22:29:01.460714', 'step': 10932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:01.522464', 'step': 10932, 'epoch': 2} {'type': 'loss', 'content': 0.11069824546575546, 'timestamp': '2025-09-30 22:29:01.526404', 'step': 10933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:01.592568', 'step': 10933, 'epoch': 2} {'type': 'loss', 'content': 0.1556670218706131, 'timestamp': '2025-09-30 22:29:01.596536', 'step': 10934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:01.653874', 'step': 10934, 'epoch': 2} {'type': 'loss', 'content': 0.12325844913721085, 'timestamp': '2025-09-30 22:29:01.657279', 'step': 10935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:01.732114', 'step': 10935, 'epoch': 2} {'type': 'loss', 'content': 0.2549606263637543, 'timestamp': '2025-09-30 22:29:01.743436', 'step': 10936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:01.804514', 'step': 10936, 'epoch': 2} {'type': 'loss', 'content': 0.09112440794706345, 'timestamp': '2025-09-30 22:29:01.811935', 'step': 10937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:01.871949', 'step': 10937, 'epoch': 2} {'type': 'loss', 'content': 0.1281379759311676, 'timestamp': '2025-09-30 22:29:01.876176', 'step': 10938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:01.942690', 'step': 10938, 'epoch': 2} {'type': 'loss', 'content': 0.12427424639463425, 'timestamp': '2025-09-30 22:29:01.946323', 'step': 10939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:02.004424', 'step': 10939, 'epoch': 2} {'type': 'loss', 'content': 0.1624363660812378, 'timestamp': '2025-09-30 22:29:02.012313', 'step': 10940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:02.070229', 'step': 10940, 'epoch': 2} {'type': 'loss', 'content': 0.11786488443613052, 'timestamp': '2025-09-30 22:29:02.074272', 'step': 10941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:02.132699', 'step': 10941, 'epoch': 2} {'type': 'loss', 'content': 0.19741670787334442, 'timestamp': '2025-09-30 22:29:02.144238', 'step': 10942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:02.208189', 'step': 10942, 'epoch': 2} {'type': 'loss', 'content': 0.1692861169576645, 'timestamp': '2025-09-30 22:29:02.229086', 'step': 10943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:02.289299', 'step': 10943, 'epoch': 2} {'type': 'loss', 'content': 0.13157309591770172, 'timestamp': '2025-09-30 22:29:02.299256', 'step': 10944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:02.367686', 'step': 10944, 'epoch': 2} {'type': 'loss', 'content': 0.12347279489040375, 'timestamp': '2025-09-30 22:29:02.372657', 'step': 10945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:02.431536', 'step': 10945, 'epoch': 2} {'type': 'loss', 'content': 0.09627048671245575, 'timestamp': '2025-09-30 22:29:02.445320', 'step': 10946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:02.517622', 'step': 10946, 'epoch': 2} {'type': 'loss', 'content': 0.11510282009840012, 'timestamp': '2025-09-30 22:29:02.524232', 'step': 10947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:02.589336', 'step': 10947, 'epoch': 2} {'type': 'loss', 'content': 0.11417577415704727, 'timestamp': '2025-09-30 22:29:02.598274', 'step': 10948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:02.667474', 'step': 10948, 'epoch': 2} {'type': 'loss', 'content': 0.13145871460437775, 'timestamp': '2025-09-30 22:29:02.672066', 'step': 10949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:02.732659', 'step': 10949, 'epoch': 2} {'type': 'loss', 'content': 0.10007483512163162, 'timestamp': '2025-09-30 22:29:02.736441', 'step': 10950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:02.796245', 'step': 10950, 'epoch': 2} {'type': 'loss', 'content': 0.12052562832832336, 'timestamp': '2025-09-30 22:29:02.800138', 'step': 10951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:02.859621', 'step': 10951, 'epoch': 2} {'type': 'loss', 'content': 0.10230160504579544, 'timestamp': '2025-09-30 22:29:02.866864', 'step': 10952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:02.925113', 'step': 10952, 'epoch': 2} {'type': 'loss', 'content': 0.06863082945346832, 'timestamp': '2025-09-30 22:29:02.932372', 'step': 10953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:02.991802', 'step': 10953, 'epoch': 2} {'type': 'loss', 'content': 0.13766072690486908, 'timestamp': '2025-09-30 22:29:02.995263', 'step': 10954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:03.053665', 'step': 10954, 'epoch': 2} {'type': 'loss', 'content': 0.12388354539871216, 'timestamp': '2025-09-30 22:29:03.066631', 'step': 10955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:03.125541', 'step': 10955, 'epoch': 2} {'type': 'loss', 'content': 0.2921951711177826, 'timestamp': '2025-09-30 22:29:03.138590', 'step': 10956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:29:03.197237', 'step': 10956, 'epoch': 2} {'type': 'loss', 'content': 0.08212682604789734, 'timestamp': '2025-09-30 22:29:03.201034', 'step': 10957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:03.274732', 'step': 10957, 'epoch': 2} {'type': 'loss', 'content': 0.1527346968650818, 'timestamp': '2025-09-30 22:29:03.279549', 'step': 10958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:03.350387', 'step': 10958, 'epoch': 2} {'type': 'loss', 'content': 0.12076026946306229, 'timestamp': '2025-09-30 22:29:03.356019', 'step': 10959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:03.415221', 'step': 10959, 'epoch': 2} {'type': 'loss', 'content': 0.10926678776741028, 'timestamp': '2025-09-30 22:29:03.423111', 'step': 10960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:03.489180', 'step': 10960, 'epoch': 2} {'type': 'loss', 'content': 0.16970080137252808, 'timestamp': '2025-09-30 22:29:03.492603', 'step': 10961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:03.551799', 'step': 10961, 'epoch': 2} {'type': 'loss', 'content': 0.09475476294755936, 'timestamp': '2025-09-30 22:29:03.555211', 'step': 10962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:03.622558', 'step': 10962, 'epoch': 2} {'type': 'loss', 'content': 0.07603359967470169, 'timestamp': '2025-09-30 22:29:03.626087', 'step': 10963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:03.689084', 'step': 10963, 'epoch': 2} {'type': 'loss', 'content': 0.14569644629955292, 'timestamp': '2025-09-30 22:29:03.695968', 'step': 10964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:03.755799', 'step': 10964, 'epoch': 2} {'type': 'loss', 'content': 0.11216235905885696, 'timestamp': '2025-09-30 22:29:03.759817', 'step': 10965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:03.817957', 'step': 10965, 'epoch': 2} {'type': 'loss', 'content': 0.11524458974599838, 'timestamp': '2025-09-30 22:29:03.825892', 'step': 10966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:03.886015', 'step': 10966, 'epoch': 2} {'type': 'loss', 'content': 0.1287306547164917, 'timestamp': '2025-09-30 22:29:03.890788', 'step': 10967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:03.951506', 'step': 10967, 'epoch': 2} {'type': 'loss', 'content': 0.10912828147411346, 'timestamp': '2025-09-30 22:29:03.961011', 'step': 10968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:04.019007', 'step': 10968, 'epoch': 2} {'type': 'loss', 'content': 0.10036608576774597, 'timestamp': '2025-09-30 22:29:04.035872', 'step': 10969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:04.095117', 'step': 10969, 'epoch': 2} {'type': 'loss', 'content': 0.17309284210205078, 'timestamp': '2025-09-30 22:29:04.098541', 'step': 10970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:04.157715', 'step': 10970, 'epoch': 2} {'type': 'loss', 'content': 0.18691220879554749, 'timestamp': '2025-09-30 22:29:04.166190', 'step': 10971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:04.226047', 'step': 10971, 'epoch': 2} {'type': 'loss', 'content': 0.12378782033920288, 'timestamp': '2025-09-30 22:29:04.234882', 'step': 10972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:04.293101', 'step': 10972, 'epoch': 2} {'type': 'loss', 'content': 0.06682365387678146, 'timestamp': '2025-09-30 22:29:04.300692', 'step': 10973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:04.361049', 'step': 10973, 'epoch': 2} {'type': 'loss', 'content': 0.12651097774505615, 'timestamp': '2025-09-30 22:29:04.374469', 'step': 10974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:04.441884', 'step': 10974, 'epoch': 2} {'type': 'loss', 'content': 0.23038645088672638, 'timestamp': '2025-09-30 22:29:04.446641', 'step': 10975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:04.522141', 'step': 10975, 'epoch': 2} {'type': 'loss', 'content': 0.11226937174797058, 'timestamp': '2025-09-30 22:29:04.529536', 'step': 10976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:04.593228', 'step': 10976, 'epoch': 2} {'type': 'loss', 'content': 0.12366435676813126, 'timestamp': '2025-09-30 22:29:04.603517', 'step': 10977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:04.674677', 'step': 10977, 'epoch': 2} {'type': 'loss', 'content': 0.12959203124046326, 'timestamp': '2025-09-30 22:29:04.682258', 'step': 10978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:04.745105', 'step': 10978, 'epoch': 2} {'type': 'loss', 'content': 0.11692876368761063, 'timestamp': '2025-09-30 22:29:04.748265', 'step': 10979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:04.823690', 'step': 10979, 'epoch': 2} {'type': 'loss', 'content': 0.20348620414733887, 'timestamp': '2025-09-30 22:29:04.831015', 'step': 10980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:04.888391', 'step': 10980, 'epoch': 2} {'type': 'loss', 'content': 0.10745468735694885, 'timestamp': '2025-09-30 22:29:04.902866', 'step': 10981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:04.964292', 'step': 10981, 'epoch': 2} {'type': 'loss', 'content': 0.11851544678211212, 'timestamp': '2025-09-30 22:29:04.968611', 'step': 10982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:05.028286', 'step': 10982, 'epoch': 2} {'type': 'loss', 'content': 0.17733487486839294, 'timestamp': '2025-09-30 22:29:05.034138', 'step': 10983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:05.103631', 'step': 10983, 'epoch': 2} {'type': 'loss', 'content': 0.07563913613557816, 'timestamp': '2025-09-30 22:29:05.110385', 'step': 10984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:29:05.187036', 'step': 10984, 'epoch': 2} {'type': 'loss', 'content': 0.09282480180263519, 'timestamp': '2025-09-30 22:29:05.191221', 'step': 10985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:05.253746', 'step': 10985, 'epoch': 2} {'type': 'loss', 'content': 0.10566561669111252, 'timestamp': '2025-09-30 22:29:05.258999', 'step': 10986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:05.328232', 'step': 10986, 'epoch': 2} {'type': 'loss', 'content': 0.21382582187652588, 'timestamp': '2025-09-30 22:29:05.341443', 'step': 10987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:05.413425', 'step': 10987, 'epoch': 2} {'type': 'loss', 'content': 0.09820172190666199, 'timestamp': '2025-09-30 22:29:05.425442', 'step': 10988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:05.506087', 'step': 10988, 'epoch': 2} {'type': 'loss', 'content': 0.12899640202522278, 'timestamp': '2025-09-30 22:29:05.512873', 'step': 10989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:05.573143', 'step': 10989, 'epoch': 2} {'type': 'loss', 'content': 0.12708692252635956, 'timestamp': '2025-09-30 22:29:05.576688', 'step': 10990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:05.639643', 'step': 10990, 'epoch': 2} {'type': 'loss', 'content': 0.10990384966135025, 'timestamp': '2025-09-30 22:29:05.657474', 'step': 10991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:05.730414', 'step': 10991, 'epoch': 2} {'type': 'loss', 'content': 0.10508488118648529, 'timestamp': '2025-09-30 22:29:05.736896', 'step': 10992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:05.800814', 'step': 10992, 'epoch': 2} {'type': 'loss', 'content': 0.16418318450450897, 'timestamp': '2025-09-30 22:29:05.804310', 'step': 10993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:05.862863', 'step': 10993, 'epoch': 2} {'type': 'loss', 'content': 0.15373112261295319, 'timestamp': '2025-09-30 22:29:05.875301', 'step': 10994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:05.940547', 'step': 10994, 'epoch': 2} {'type': 'loss', 'content': 0.09721165895462036, 'timestamp': '2025-09-30 22:29:05.944368', 'step': 10995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:06.012114', 'step': 10995, 'epoch': 2} {'type': 'loss', 'content': 0.13331012427806854, 'timestamp': '2025-09-30 22:29:06.028268', 'step': 10996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:06.114754', 'step': 10996, 'epoch': 2} {'type': 'loss', 'content': 0.15328364074230194, 'timestamp': '2025-09-30 22:29:06.117921', 'step': 10997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:06.175925', 'step': 10997, 'epoch': 2} {'type': 'loss', 'content': 0.04745525121688843, 'timestamp': '2025-09-30 22:29:06.182828', 'step': 10998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:06.240873', 'step': 10998, 'epoch': 2} {'type': 'loss', 'content': 0.21321538090705872, 'timestamp': '2025-09-30 22:29:06.247436', 'step': 10999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:06.306225', 'step': 10999, 'epoch': 2} {'type': 'loss', 'content': 0.18193195760250092, 'timestamp': '2025-09-30 22:29:06.322616', 'step': 11000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11000', 'timestamp': '2025-09-30 22:29:06.786936', 'step': 11000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:06.844537', 'step': 11000, 'epoch': 2} {'type': 'loss', 'content': 0.17848117649555206, 'timestamp': '2025-09-30 22:29:06.849744', 'step': 11001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:06.915422', 'step': 11001, 'epoch': 2} {'type': 'loss', 'content': 0.15740589797496796, 'timestamp': '2025-09-30 22:29:06.921175', 'step': 11002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:06.981523', 'step': 11002, 'epoch': 2} {'type': 'loss', 'content': 0.1044485792517662, 'timestamp': '2025-09-30 22:29:06.984779', 'step': 11003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:07.042846', 'step': 11003, 'epoch': 2} {'type': 'loss', 'content': 0.14152511954307556, 'timestamp': '2025-09-30 22:29:07.049731', 'step': 11004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:07.107880', 'step': 11004, 'epoch': 2} {'type': 'loss', 'content': 0.14013972878456116, 'timestamp': '2025-09-30 22:29:07.111631', 'step': 11005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:07.169198', 'step': 11005, 'epoch': 2} {'type': 'loss', 'content': 0.21923667192459106, 'timestamp': '2025-09-30 22:29:07.182170', 'step': 11006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:29:07.243130', 'step': 11006, 'epoch': 2} {'type': 'loss', 'content': 0.13130149245262146, 'timestamp': '2025-09-30 22:29:07.256045', 'step': 11007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:07.315312', 'step': 11007, 'epoch': 2} {'type': 'loss', 'content': 0.068161241710186, 'timestamp': '2025-09-30 22:29:07.322358', 'step': 11008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:07.379476', 'step': 11008, 'epoch': 2} {'type': 'loss', 'content': 0.04918305203318596, 'timestamp': '2025-09-30 22:29:07.383614', 'step': 11009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:07.443545', 'step': 11009, 'epoch': 2} {'type': 'loss', 'content': 0.15375328063964844, 'timestamp': '2025-09-30 22:29:07.448640', 'step': 11010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:07.507145', 'step': 11010, 'epoch': 2} {'type': 'loss', 'content': 0.10824108868837357, 'timestamp': '2025-09-30 22:29:07.513977', 'step': 11011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:07.571340', 'step': 11011, 'epoch': 2} {'type': 'loss', 'content': 0.0822984054684639, 'timestamp': '2025-09-30 22:29:07.578013', 'step': 11012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:07.635107', 'step': 11012, 'epoch': 2} {'type': 'loss', 'content': 0.08075160533189774, 'timestamp': '2025-09-30 22:29:07.638303', 'step': 11013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:07.707274', 'step': 11013, 'epoch': 2} {'type': 'loss', 'content': 0.08267293125391006, 'timestamp': '2025-09-30 22:29:07.710500', 'step': 11014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:07.776050', 'step': 11014, 'epoch': 2} {'type': 'loss', 'content': 0.1346425712108612, 'timestamp': '2025-09-30 22:29:07.787969', 'step': 11015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:07.846600', 'step': 11015, 'epoch': 2} {'type': 'loss', 'content': 0.1383044719696045, 'timestamp': '2025-09-30 22:29:07.854178', 'step': 11016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:07.912941', 'step': 11016, 'epoch': 2} {'type': 'loss', 'content': 0.1267746090888977, 'timestamp': '2025-09-30 22:29:07.916270', 'step': 11017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:07.983804', 'step': 11017, 'epoch': 2} {'type': 'loss', 'content': 0.15015046298503876, 'timestamp': '2025-09-30 22:29:07.986873', 'step': 11018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:08.045136', 'step': 11018, 'epoch': 2} {'type': 'loss', 'content': 0.22025254368782043, 'timestamp': '2025-09-30 22:29:08.049162', 'step': 11019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:08.107472', 'step': 11019, 'epoch': 2} {'type': 'loss', 'content': 0.03599032759666443, 'timestamp': '2025-09-30 22:29:08.114702', 'step': 11020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:08.172183', 'step': 11020, 'epoch': 2} {'type': 'loss', 'content': 0.17518971860408783, 'timestamp': '2025-09-30 22:29:08.175372', 'step': 11021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:08.245065', 'step': 11021, 'epoch': 2} {'type': 'loss', 'content': 0.17215672135353088, 'timestamp': '2025-09-30 22:29:08.247856', 'step': 11022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:08.305237', 'step': 11022, 'epoch': 2} {'type': 'loss', 'content': 0.1068863570690155, 'timestamp': '2025-09-30 22:29:08.308094', 'step': 11023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:08.369269', 'step': 11023, 'epoch': 2} {'type': 'loss', 'content': 0.09016396850347519, 'timestamp': '2025-09-30 22:29:08.375801', 'step': 11024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:08.440551', 'step': 11024, 'epoch': 2} {'type': 'loss', 'content': 0.09898345917463303, 'timestamp': '2025-09-30 22:29:08.443733', 'step': 11025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:08.503953', 'step': 11025, 'epoch': 2} {'type': 'loss', 'content': 0.14533232152462006, 'timestamp': '2025-09-30 22:29:08.506842', 'step': 11026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:08.566871', 'step': 11026, 'epoch': 2} {'type': 'loss', 'content': 0.050960563123226166, 'timestamp': '2025-09-30 22:29:08.570912', 'step': 11027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:08.631282', 'step': 11027, 'epoch': 2} {'type': 'loss', 'content': 0.07334985584020615, 'timestamp': '2025-09-30 22:29:08.638430', 'step': 11028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:08.703403', 'step': 11028, 'epoch': 2} {'type': 'loss', 'content': 0.15880365669727325, 'timestamp': '2025-09-30 22:29:08.717398', 'step': 11029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:08.779934', 'step': 11029, 'epoch': 2} {'type': 'loss', 'content': 0.1594112515449524, 'timestamp': '2025-09-30 22:29:08.784396', 'step': 11030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:08.845338', 'step': 11030, 'epoch': 2} {'type': 'loss', 'content': 0.13762252032756805, 'timestamp': '2025-09-30 22:29:08.851149', 'step': 11031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:08.909610', 'step': 11031, 'epoch': 2} {'type': 'loss', 'content': 0.14050473272800446, 'timestamp': '2025-09-30 22:29:08.915770', 'step': 11032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:08.980721', 'step': 11032, 'epoch': 2} {'type': 'loss', 'content': 0.12643374502658844, 'timestamp': '2025-09-30 22:29:08.983463', 'step': 11033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:09.044543', 'step': 11033, 'epoch': 2} {'type': 'loss', 'content': 0.2184944450855255, 'timestamp': '2025-09-30 22:29:09.046926', 'step': 11034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:09.103984', 'step': 11034, 'epoch': 2} {'type': 'loss', 'content': 0.15868568420410156, 'timestamp': '2025-09-30 22:29:09.106306', 'step': 11035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:09.164801', 'step': 11035, 'epoch': 2} {'type': 'loss', 'content': 0.17136003077030182, 'timestamp': '2025-09-30 22:29:09.171007', 'step': 11036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:09.227478', 'step': 11036, 'epoch': 2} {'type': 'loss', 'content': 0.1653619408607483, 'timestamp': '2025-09-30 22:29:09.230569', 'step': 11037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:09.293112', 'step': 11037, 'epoch': 2} {'type': 'loss', 'content': 0.14865033328533173, 'timestamp': '2025-09-30 22:29:09.295976', 'step': 11038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:09.355134', 'step': 11038, 'epoch': 2} {'type': 'loss', 'content': 0.0727347731590271, 'timestamp': '2025-09-30 22:29:09.367002', 'step': 11039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:09.426239', 'step': 11039, 'epoch': 2} {'type': 'loss', 'content': 0.0644325390458107, 'timestamp': '2025-09-30 22:29:09.433305', 'step': 11040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:09.490428', 'step': 11040, 'epoch': 2} {'type': 'loss', 'content': 0.10288819670677185, 'timestamp': '2025-09-30 22:29:09.494043', 'step': 11041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:09.564594', 'step': 11041, 'epoch': 2} {'type': 'loss', 'content': 0.09646014124155045, 'timestamp': '2025-09-30 22:29:09.567617', 'step': 11042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:09.628703', 'step': 11042, 'epoch': 2} {'type': 'loss', 'content': 0.07303618639707565, 'timestamp': '2025-09-30 22:29:09.632050', 'step': 11043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:09.704390', 'step': 11043, 'epoch': 2} {'type': 'loss', 'content': 0.1447492241859436, 'timestamp': '2025-09-30 22:29:09.711325', 'step': 11044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:09.769797', 'step': 11044, 'epoch': 2} {'type': 'loss', 'content': 0.19839370250701904, 'timestamp': '2025-09-30 22:29:09.772440', 'step': 11045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:09.831260', 'step': 11045, 'epoch': 2} {'type': 'loss', 'content': 0.21766632795333862, 'timestamp': '2025-09-30 22:29:09.840899', 'step': 11046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:09.917968', 'step': 11046, 'epoch': 2} {'type': 'loss', 'content': 0.06900203227996826, 'timestamp': '2025-09-30 22:29:09.920319', 'step': 11047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:09.987902', 'step': 11047, 'epoch': 2} {'type': 'loss', 'content': 0.0990261659026146, 'timestamp': '2025-09-30 22:29:09.999987', 'step': 11048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:10.056876', 'step': 11048, 'epoch': 2} {'type': 'loss', 'content': 0.13603797554969788, 'timestamp': '2025-09-30 22:29:10.072085', 'step': 11049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:10.130728', 'step': 11049, 'epoch': 2} {'type': 'loss', 'content': 0.16602467000484467, 'timestamp': '2025-09-30 22:29:10.133249', 'step': 11050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:10.190249', 'step': 11050, 'epoch': 2} {'type': 'loss', 'content': 0.0816546380519867, 'timestamp': '2025-09-30 22:29:10.194040', 'step': 11051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:10.251438', 'step': 11051, 'epoch': 2} {'type': 'loss', 'content': 0.08845098316669464, 'timestamp': '2025-09-30 22:29:10.262780', 'step': 11052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:10.343439', 'step': 11052, 'epoch': 2} {'type': 'loss', 'content': 0.07036665827035904, 'timestamp': '2025-09-30 22:29:10.347736', 'step': 11053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:10.406549', 'step': 11053, 'epoch': 2} {'type': 'loss', 'content': 0.15964487195014954, 'timestamp': '2025-09-30 22:29:10.415674', 'step': 11054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:10.477228', 'step': 11054, 'epoch': 2} {'type': 'loss', 'content': 0.1264844387769699, 'timestamp': '2025-09-30 22:29:10.481207', 'step': 11055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:10.540539', 'step': 11055, 'epoch': 2} {'type': 'loss', 'content': 0.06065593287348747, 'timestamp': '2025-09-30 22:29:10.547991', 'step': 11056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:10.607420', 'step': 11056, 'epoch': 2} {'type': 'loss', 'content': 0.17690090835094452, 'timestamp': '2025-09-30 22:29:10.611741', 'step': 11057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:10.674783', 'step': 11057, 'epoch': 2} {'type': 'loss', 'content': 0.06900419294834137, 'timestamp': '2025-09-30 22:29:10.679806', 'step': 11058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:10.740136', 'step': 11058, 'epoch': 2} {'type': 'loss', 'content': 0.09966929256916046, 'timestamp': '2025-09-30 22:29:10.747802', 'step': 11059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:10.818079', 'step': 11059, 'epoch': 2} {'type': 'loss', 'content': 0.1922854781150818, 'timestamp': '2025-09-30 22:29:10.824745', 'step': 11060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:10.883314', 'step': 11060, 'epoch': 2} {'type': 'loss', 'content': 0.15388838946819305, 'timestamp': '2025-09-30 22:29:10.888736', 'step': 11061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:10.949099', 'step': 11061, 'epoch': 2} {'type': 'loss', 'content': 0.06706219166517258, 'timestamp': '2025-09-30 22:29:10.951467', 'step': 11062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:11.013719', 'step': 11062, 'epoch': 2} {'type': 'loss', 'content': 0.06886812299489975, 'timestamp': '2025-09-30 22:29:11.016162', 'step': 11063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:11.073807', 'step': 11063, 'epoch': 2} {'type': 'loss', 'content': 0.13159440457820892, 'timestamp': '2025-09-30 22:29:11.079648', 'step': 11064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:11.142569', 'step': 11064, 'epoch': 2} {'type': 'loss', 'content': 0.07393785566091537, 'timestamp': '2025-09-30 22:29:11.145065', 'step': 11065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:11.207255', 'step': 11065, 'epoch': 2} {'type': 'loss', 'content': 0.07474220544099808, 'timestamp': '2025-09-30 22:29:11.211208', 'step': 11066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:11.272046', 'step': 11066, 'epoch': 2} {'type': 'loss', 'content': 0.22991502285003662, 'timestamp': '2025-09-30 22:29:11.275443', 'step': 11067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:11.339177', 'step': 11067, 'epoch': 2} {'type': 'loss', 'content': 0.2248244285583496, 'timestamp': '2025-09-30 22:29:11.349359', 'step': 11068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:11.406230', 'step': 11068, 'epoch': 2} {'type': 'loss', 'content': 0.12250367552042007, 'timestamp': '2025-09-30 22:29:11.410297', 'step': 11069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:11.491528', 'step': 11069, 'epoch': 2} {'type': 'loss', 'content': 0.11455317586660385, 'timestamp': '2025-09-30 22:29:11.494361', 'step': 11070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:11.553274', 'step': 11070, 'epoch': 2} {'type': 'loss', 'content': 0.22032508254051208, 'timestamp': '2025-09-30 22:29:11.556416', 'step': 11071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:11.619166', 'step': 11071, 'epoch': 2} {'type': 'loss', 'content': 0.17021824419498444, 'timestamp': '2025-09-30 22:29:11.629439', 'step': 11072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:11.691041', 'step': 11072, 'epoch': 2} {'type': 'loss', 'content': 0.11397074162960052, 'timestamp': '2025-09-30 22:29:11.693618', 'step': 11073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:11.773377', 'step': 11073, 'epoch': 2} {'type': 'loss', 'content': 0.1927115023136139, 'timestamp': '2025-09-30 22:29:11.775730', 'step': 11074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:11.836245', 'step': 11074, 'epoch': 2} {'type': 'loss', 'content': 0.06017589196562767, 'timestamp': '2025-09-30 22:29:11.839016', 'step': 11075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:11.902819', 'step': 11075, 'epoch': 2} {'type': 'loss', 'content': 0.17170725762844086, 'timestamp': '2025-09-30 22:29:11.909122', 'step': 11076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:11.966658', 'step': 11076, 'epoch': 2} {'type': 'loss', 'content': 0.08290830254554749, 'timestamp': '2025-09-30 22:29:11.971199', 'step': 11077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:12.051952', 'step': 11077, 'epoch': 2} {'type': 'loss', 'content': 0.12210830301046371, 'timestamp': '2025-09-30 22:29:12.055045', 'step': 11078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:12.128526', 'step': 11078, 'epoch': 2} {'type': 'loss', 'content': 0.13621145486831665, 'timestamp': '2025-09-30 22:29:12.131213', 'step': 11079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:12.195876', 'step': 11079, 'epoch': 2} {'type': 'loss', 'content': 0.15578438341617584, 'timestamp': '2025-09-30 22:29:12.202084', 'step': 11080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:12.271201', 'step': 11080, 'epoch': 2} {'type': 'loss', 'content': 0.15642112493515015, 'timestamp': '2025-09-30 22:29:12.276734', 'step': 11081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:12.349659', 'step': 11081, 'epoch': 2} {'type': 'loss', 'content': 0.0885169729590416, 'timestamp': '2025-09-30 22:29:12.352779', 'step': 11082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:12.413337', 'step': 11082, 'epoch': 2} {'type': 'loss', 'content': 0.10073664039373398, 'timestamp': '2025-09-30 22:29:12.421063', 'step': 11083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:12.487163', 'step': 11083, 'epoch': 2} {'type': 'loss', 'content': 0.19244834780693054, 'timestamp': '2025-09-30 22:29:12.496026', 'step': 11084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:12.555850', 'step': 11084, 'epoch': 2} {'type': 'loss', 'content': 0.13208939135074615, 'timestamp': '2025-09-30 22:29:12.559097', 'step': 11085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:12.627401', 'step': 11085, 'epoch': 2} {'type': 'loss', 'content': 0.057344045490026474, 'timestamp': '2025-09-30 22:29:12.631060', 'step': 11086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:12.699407', 'step': 11086, 'epoch': 2} {'type': 'loss', 'content': 0.06559612601995468, 'timestamp': '2025-09-30 22:29:12.704994', 'step': 11087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:12.762117', 'step': 11087, 'epoch': 2} {'type': 'loss', 'content': 0.08352141082286835, 'timestamp': '2025-09-30 22:29:12.768584', 'step': 11088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:12.826267', 'step': 11088, 'epoch': 2} {'type': 'loss', 'content': 0.1828385889530182, 'timestamp': '2025-09-30 22:29:12.842109', 'step': 11089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:12.903684', 'step': 11089, 'epoch': 2} {'type': 'loss', 'content': 0.16665412485599518, 'timestamp': '2025-09-30 22:29:12.907841', 'step': 11090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:12.967265', 'step': 11090, 'epoch': 2} {'type': 'loss', 'content': 0.1590089350938797, 'timestamp': '2025-09-30 22:29:12.970301', 'step': 11091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:13.051640', 'step': 11091, 'epoch': 2} {'type': 'loss', 'content': 0.19212046265602112, 'timestamp': '2025-09-30 22:29:13.060318', 'step': 11092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:13.133190', 'step': 11092, 'epoch': 2} {'type': 'loss', 'content': 0.1862032562494278, 'timestamp': '2025-09-30 22:29:13.137875', 'step': 11093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:13.204368', 'step': 11093, 'epoch': 2} {'type': 'loss', 'content': 0.14954936504364014, 'timestamp': '2025-09-30 22:29:13.206989', 'step': 11094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:13.264451', 'step': 11094, 'epoch': 2} {'type': 'loss', 'content': 0.07278429716825485, 'timestamp': '2025-09-30 22:29:13.267277', 'step': 11095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:13.324136', 'step': 11095, 'epoch': 2} {'type': 'loss', 'content': 0.17110669612884521, 'timestamp': '2025-09-30 22:29:13.331326', 'step': 11096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:13.402498', 'step': 11096, 'epoch': 2} {'type': 'loss', 'content': 0.14415554702281952, 'timestamp': '2025-09-30 22:29:13.405833', 'step': 11097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:13.464303', 'step': 11097, 'epoch': 2} {'type': 'loss', 'content': 0.1349565088748932, 'timestamp': '2025-09-30 22:29:13.467451', 'step': 11098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:13.528362', 'step': 11098, 'epoch': 2} {'type': 'loss', 'content': 0.1503305584192276, 'timestamp': '2025-09-30 22:29:13.531909', 'step': 11099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:13.589398', 'step': 11099, 'epoch': 2} {'type': 'loss', 'content': 0.16214853525161743, 'timestamp': '2025-09-30 22:29:13.597355', 'step': 11100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:13.655315', 'step': 11100, 'epoch': 2} {'type': 'loss', 'content': 0.12999723851680756, 'timestamp': '2025-09-30 22:29:13.660493', 'step': 11101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:13.719376', 'step': 11101, 'epoch': 2} {'type': 'loss', 'content': 0.12275867164134979, 'timestamp': '2025-09-30 22:29:13.722160', 'step': 11102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:13.781676', 'step': 11102, 'epoch': 2} {'type': 'loss', 'content': 0.11139322072267532, 'timestamp': '2025-09-30 22:29:13.796469', 'step': 11103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:13.853312', 'step': 11103, 'epoch': 2} {'type': 'loss', 'content': 0.10206495225429535, 'timestamp': '2025-09-30 22:29:13.859407', 'step': 11104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:13.921443', 'step': 11104, 'epoch': 2} {'type': 'loss', 'content': 0.09518563002347946, 'timestamp': '2025-09-30 22:29:13.926032', 'step': 11105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:13.984303', 'step': 11105, 'epoch': 2} {'type': 'loss', 'content': 0.10291565954685211, 'timestamp': '2025-09-30 22:29:13.986669', 'step': 11106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:14.044427', 'step': 11106, 'epoch': 2} {'type': 'loss', 'content': 0.08403823524713516, 'timestamp': '2025-09-30 22:29:14.046966', 'step': 11107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:14.104601', 'step': 11107, 'epoch': 2} {'type': 'loss', 'content': 0.19491037726402283, 'timestamp': '2025-09-30 22:29:14.112415', 'step': 11108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:14.169507', 'step': 11108, 'epoch': 2} {'type': 'loss', 'content': 0.1361982673406601, 'timestamp': '2025-09-30 22:29:14.172541', 'step': 11109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:14.231721', 'step': 11109, 'epoch': 2} {'type': 'loss', 'content': 0.2522987723350525, 'timestamp': '2025-09-30 22:29:14.233985', 'step': 11110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:14.293146', 'step': 11110, 'epoch': 2} {'type': 'loss', 'content': 0.11402285099029541, 'timestamp': '2025-09-30 22:29:14.296587', 'step': 11111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-30 22:29:14.358460', 'step': 11111, 'epoch': 2} {'type': 'loss', 'content': 0.12016837298870087, 'timestamp': '2025-09-30 22:29:14.369594', 'step': 11112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:14.429405', 'step': 11112, 'epoch': 2} {'type': 'loss', 'content': 0.0914461687207222, 'timestamp': '2025-09-30 22:29:14.432485', 'step': 11113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:14.489224', 'step': 11113, 'epoch': 2} {'type': 'loss', 'content': 0.131954163312912, 'timestamp': '2025-09-30 22:29:14.493761', 'step': 11114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:14.562200', 'step': 11114, 'epoch': 2} {'type': 'loss', 'content': 0.0640726387500763, 'timestamp': '2025-09-30 22:29:14.565333', 'step': 11115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:14.623171', 'step': 11115, 'epoch': 2} {'type': 'loss', 'content': 0.12448454648256302, 'timestamp': '2025-09-30 22:29:14.629129', 'step': 11116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:14.693649', 'step': 11116, 'epoch': 2} {'type': 'loss', 'content': 0.17161689698696136, 'timestamp': '2025-09-30 22:29:14.696270', 'step': 11117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:14.760199', 'step': 11117, 'epoch': 2} {'type': 'loss', 'content': 0.0876225158572197, 'timestamp': '2025-09-30 22:29:14.762541', 'step': 11118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:14.829207', 'step': 11118, 'epoch': 2} {'type': 'loss', 'content': 0.089203380048275, 'timestamp': '2025-09-30 22:29:14.831479', 'step': 11119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:14.888248', 'step': 11119, 'epoch': 2} {'type': 'loss', 'content': 0.25104284286499023, 'timestamp': '2025-09-30 22:29:14.894081', 'step': 11120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:14.968507', 'step': 11120, 'epoch': 2} {'type': 'loss', 'content': 0.15647172927856445, 'timestamp': '2025-09-30 22:29:14.971029', 'step': 11121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:15.028112', 'step': 11121, 'epoch': 2} {'type': 'loss', 'content': 0.15694762766361237, 'timestamp': '2025-09-30 22:29:15.030733', 'step': 11122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:15.104934', 'step': 11122, 'epoch': 2} {'type': 'loss', 'content': 0.21936337649822235, 'timestamp': '2025-09-30 22:29:15.108378', 'step': 11123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:15.181542', 'step': 11123, 'epoch': 2} {'type': 'loss', 'content': 0.12760576605796814, 'timestamp': '2025-09-30 22:29:15.187907', 'step': 11124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:29:15.245098', 'step': 11124, 'epoch': 2} {'type': 'loss', 'content': 0.11221501976251602, 'timestamp': '2025-09-30 22:29:15.249052', 'step': 11125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:15.324215', 'step': 11125, 'epoch': 2} {'type': 'loss', 'content': 0.1572614312171936, 'timestamp': '2025-09-30 22:29:15.327851', 'step': 11126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:15.389159', 'step': 11126, 'epoch': 2} {'type': 'loss', 'content': 0.15421055257320404, 'timestamp': '2025-09-30 22:29:15.395328', 'step': 11127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:15.453538', 'step': 11127, 'epoch': 2} {'type': 'loss', 'content': 0.0978146344423294, 'timestamp': '2025-09-30 22:29:15.460818', 'step': 11128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:15.520860', 'step': 11128, 'epoch': 2} {'type': 'loss', 'content': 0.07024779915809631, 'timestamp': '2025-09-30 22:29:15.523746', 'step': 11129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:15.596107', 'step': 11129, 'epoch': 2} {'type': 'loss', 'content': 0.12726731598377228, 'timestamp': '2025-09-30 22:29:15.599022', 'step': 11130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:15.657293', 'step': 11130, 'epoch': 2} {'type': 'loss', 'content': 0.10211104899644852, 'timestamp': '2025-09-30 22:29:15.660430', 'step': 11131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:15.717404', 'step': 11131, 'epoch': 2} {'type': 'loss', 'content': 0.15089987218379974, 'timestamp': '2025-09-30 22:29:15.723960', 'step': 11132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:15.780944', 'step': 11132, 'epoch': 2} {'type': 'loss', 'content': 0.08650701493024826, 'timestamp': '2025-09-30 22:29:15.784189', 'step': 11133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:15.844877', 'step': 11133, 'epoch': 2} {'type': 'loss', 'content': 0.1411798596382141, 'timestamp': '2025-09-30 22:29:15.847312', 'step': 11134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:15.917247', 'step': 11134, 'epoch': 2} {'type': 'loss', 'content': 0.24292387068271637, 'timestamp': '2025-09-30 22:29:15.921498', 'step': 11135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:15.982783', 'step': 11135, 'epoch': 2} {'type': 'loss', 'content': 0.046566739678382874, 'timestamp': '2025-09-30 22:29:15.992655', 'step': 11136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:16.051056', 'step': 11136, 'epoch': 2} {'type': 'loss', 'content': 0.132639542222023, 'timestamp': '2025-09-30 22:29:16.053438', 'step': 11137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:16.121786', 'step': 11137, 'epoch': 2} {'type': 'loss', 'content': 0.1172042191028595, 'timestamp': '2025-09-30 22:29:16.124180', 'step': 11138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:16.182604', 'step': 11138, 'epoch': 2} {'type': 'loss', 'content': 0.1210714727640152, 'timestamp': '2025-09-30 22:29:16.185163', 'step': 11139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:16.244063', 'step': 11139, 'epoch': 2} {'type': 'loss', 'content': 0.18550780415534973, 'timestamp': '2025-09-30 22:29:16.249788', 'step': 11140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:29:16.315423', 'step': 11140, 'epoch': 2} {'type': 'loss', 'content': 0.11006001383066177, 'timestamp': '2025-09-30 22:29:16.328625', 'step': 11141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:16.398482', 'step': 11141, 'epoch': 2} {'type': 'loss', 'content': 0.09138898551464081, 'timestamp': '2025-09-30 22:29:16.401241', 'step': 11142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:16.459961', 'step': 11142, 'epoch': 2} {'type': 'loss', 'content': 0.14025478065013885, 'timestamp': '2025-09-30 22:29:16.462746', 'step': 11143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:16.535968', 'step': 11143, 'epoch': 2} {'type': 'loss', 'content': 0.11566229909658432, 'timestamp': '2025-09-30 22:29:16.543088', 'step': 11144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:16.607260', 'step': 11144, 'epoch': 2} {'type': 'loss', 'content': 0.11134657263755798, 'timestamp': '2025-09-30 22:29:16.618783', 'step': 11145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:16.695769', 'step': 11145, 'epoch': 2} {'type': 'loss', 'content': 0.28320276737213135, 'timestamp': '2025-09-30 22:29:16.698094', 'step': 11146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:16.762240', 'step': 11146, 'epoch': 2} {'type': 'loss', 'content': 0.17663048207759857, 'timestamp': '2025-09-30 22:29:16.764954', 'step': 11147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:16.835811', 'step': 11147, 'epoch': 2} {'type': 'loss', 'content': 0.12272339314222336, 'timestamp': '2025-09-30 22:29:16.841724', 'step': 11148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:16.900668', 'step': 11148, 'epoch': 2} {'type': 'loss', 'content': 0.14128899574279785, 'timestamp': '2025-09-30 22:29:16.902948', 'step': 11149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:16.960832', 'step': 11149, 'epoch': 2} {'type': 'loss', 'content': 0.1637175977230072, 'timestamp': '2025-09-30 22:29:16.963858', 'step': 11150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:17.031148', 'step': 11150, 'epoch': 2} {'type': 'loss', 'content': 0.15198010206222534, 'timestamp': '2025-09-30 22:29:17.033659', 'step': 11151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:17.090763', 'step': 11151, 'epoch': 2} {'type': 'loss', 'content': 0.13397027552127838, 'timestamp': '2025-09-30 22:29:17.097231', 'step': 11152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:17.155461', 'step': 11152, 'epoch': 2} {'type': 'loss', 'content': 0.09018044173717499, 'timestamp': '2025-09-30 22:29:17.157903', 'step': 11153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:17.225405', 'step': 11153, 'epoch': 2} {'type': 'loss', 'content': 0.20291119813919067, 'timestamp': '2025-09-30 22:29:17.227921', 'step': 11154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:17.313186', 'step': 11154, 'epoch': 2} {'type': 'loss', 'content': 0.20598886907100677, 'timestamp': '2025-09-30 22:29:17.319591', 'step': 11155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:17.381498', 'step': 11155, 'epoch': 2} {'type': 'loss', 'content': 0.09030763804912567, 'timestamp': '2025-09-30 22:29:17.388820', 'step': 11156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:29:17.457719', 'step': 11156, 'epoch': 2} {'type': 'loss', 'content': 0.04292802885174751, 'timestamp': '2025-09-30 22:29:17.461235', 'step': 11157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:17.519235', 'step': 11157, 'epoch': 2} {'type': 'loss', 'content': 0.1349538117647171, 'timestamp': '2025-09-30 22:29:17.521931', 'step': 11158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:17.579800', 'step': 11158, 'epoch': 2} {'type': 'loss', 'content': 0.09306841343641281, 'timestamp': '2025-09-30 22:29:17.582799', 'step': 11159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:17.641406', 'step': 11159, 'epoch': 2} {'type': 'loss', 'content': 0.05426448583602905, 'timestamp': '2025-09-30 22:29:17.650695', 'step': 11160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:17.707565', 'step': 11160, 'epoch': 2} {'type': 'loss', 'content': 0.17098718881607056, 'timestamp': '2025-09-30 22:29:17.709852', 'step': 11161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:17.769384', 'step': 11161, 'epoch': 2} {'type': 'loss', 'content': 0.12266135960817337, 'timestamp': '2025-09-30 22:29:17.772635', 'step': 11162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:17.830069', 'step': 11162, 'epoch': 2} {'type': 'loss', 'content': 0.13624589145183563, 'timestamp': '2025-09-30 22:29:17.832499', 'step': 11163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:17.889301', 'step': 11163, 'epoch': 2} {'type': 'loss', 'content': 0.19011226296424866, 'timestamp': '2025-09-30 22:29:17.895543', 'step': 11164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:17.952716', 'step': 11164, 'epoch': 2} {'type': 'loss', 'content': 0.14168040454387665, 'timestamp': '2025-09-30 22:29:17.955633', 'step': 11165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.027564', 'step': 11165, 'epoch': 2} {'type': 'loss', 'content': 0.08891984820365906, 'timestamp': '2025-09-30 22:29:18.030305', 'step': 11166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.088058', 'step': 11166, 'epoch': 2} {'type': 'loss', 'content': 0.19704008102416992, 'timestamp': '2025-09-30 22:29:18.090400', 'step': 11167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.161230', 'step': 11167, 'epoch': 2} {'type': 'loss', 'content': 0.16435877978801727, 'timestamp': '2025-09-30 22:29:18.166970', 'step': 11168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:18.225315', 'step': 11168, 'epoch': 2} {'type': 'loss', 'content': 0.13774842023849487, 'timestamp': '2025-09-30 22:29:18.228035', 'step': 11169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:18.284788', 'step': 11169, 'epoch': 2} {'type': 'loss', 'content': 0.07698406279087067, 'timestamp': '2025-09-30 22:29:18.287543', 'step': 11170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:18.346537', 'step': 11170, 'epoch': 2} {'type': 'loss', 'content': 0.16040296852588654, 'timestamp': '2025-09-30 22:29:18.349108', 'step': 11171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:18.408309', 'step': 11171, 'epoch': 2} {'type': 'loss', 'content': 0.0850251317024231, 'timestamp': '2025-09-30 22:29:18.414858', 'step': 11172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.474777', 'step': 11172, 'epoch': 2} {'type': 'loss', 'content': 0.21636971831321716, 'timestamp': '2025-09-30 22:29:18.477558', 'step': 11173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.541631', 'step': 11173, 'epoch': 2} {'type': 'loss', 'content': 0.17678581178188324, 'timestamp': '2025-09-30 22:29:18.543954', 'step': 11174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:18.600401', 'step': 11174, 'epoch': 2} {'type': 'loss', 'content': 0.1864437460899353, 'timestamp': '2025-09-30 22:29:18.602876', 'step': 11175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.660706', 'step': 11175, 'epoch': 2} {'type': 'loss', 'content': 0.23315617442131042, 'timestamp': '2025-09-30 22:29:18.667607', 'step': 11176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.724828', 'step': 11176, 'epoch': 2} {'type': 'loss', 'content': 0.08538968116044998, 'timestamp': '2025-09-30 22:29:18.727408', 'step': 11177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:18.785887', 'step': 11177, 'epoch': 2} {'type': 'loss', 'content': 0.12399714440107346, 'timestamp': '2025-09-30 22:29:18.788368', 'step': 11178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:18.846871', 'step': 11178, 'epoch': 2} {'type': 'loss', 'content': 0.1360311359167099, 'timestamp': '2025-09-30 22:29:18.851876', 'step': 11179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:18.925300', 'step': 11179, 'epoch': 2} {'type': 'loss', 'content': 0.11173205822706223, 'timestamp': '2025-09-30 22:29:18.936599', 'step': 11180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:18.996806', 'step': 11180, 'epoch': 2} {'type': 'loss', 'content': 0.1465526521205902, 'timestamp': '2025-09-30 22:29:19.000783', 'step': 11181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:19.060978', 'step': 11181, 'epoch': 2} {'type': 'loss', 'content': 0.05879509821534157, 'timestamp': '2025-09-30 22:29:19.063507', 'step': 11182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:19.120345', 'step': 11182, 'epoch': 2} {'type': 'loss', 'content': 0.1571999490261078, 'timestamp': '2025-09-30 22:29:19.127017', 'step': 11183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:19.183372', 'step': 11183, 'epoch': 2} {'type': 'loss', 'content': 0.16233406960964203, 'timestamp': '2025-09-30 22:29:19.189605', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:29:35.492627', 'step': 11184, 'epoch': 2} {'type': 'pplx', 'content': 13961.117413346154, 'timestamp': '2025-09-30 22:29:35.496329', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:35.553548', 'step': 11184, 'epoch': 2} {'type': 'loss', 'content': 0.12543149292469025, 'timestamp': '2025-09-30 22:29:35.555843', 'step': 11185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:35.616104', 'step': 11185, 'epoch': 2} {'type': 'loss', 'content': 0.1676938384771347, 'timestamp': '2025-09-30 22:29:35.629109', 'step': 11186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:35.698373', 'step': 11186, 'epoch': 2} {'type': 'loss', 'content': 0.12793849408626556, 'timestamp': '2025-09-30 22:29:35.702036', 'step': 11187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:35.759632', 'step': 11187, 'epoch': 2} {'type': 'loss', 'content': 0.0993361845612526, 'timestamp': '2025-09-30 22:29:35.770133', 'step': 11188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:35.828804', 'step': 11188, 'epoch': 2} {'type': 'loss', 'content': 0.08346479386091232, 'timestamp': '2025-09-30 22:29:35.831373', 'step': 11189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:35.889292', 'step': 11189, 'epoch': 2} {'type': 'loss', 'content': 0.1624573916196823, 'timestamp': '2025-09-30 22:29:35.892808', 'step': 11190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:35.949703', 'step': 11190, 'epoch': 2} {'type': 'loss', 'content': 0.10789597779512405, 'timestamp': '2025-09-30 22:29:35.954948', 'step': 11191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:36.015511', 'step': 11191, 'epoch': 2} {'type': 'loss', 'content': 0.24153341352939606, 'timestamp': '2025-09-30 22:29:36.021423', 'step': 11192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:36.092798', 'step': 11192, 'epoch': 2} {'type': 'loss', 'content': 0.16595464944839478, 'timestamp': '2025-09-30 22:29:36.098399', 'step': 11193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:36.158866', 'step': 11193, 'epoch': 2} {'type': 'loss', 'content': 0.14339973032474518, 'timestamp': '2025-09-30 22:29:36.162768', 'step': 11194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:36.222045', 'step': 11194, 'epoch': 2} {'type': 'loss', 'content': 0.12456660717725754, 'timestamp': '2025-09-30 22:29:36.227350', 'step': 11195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:36.298483', 'step': 11195, 'epoch': 2} {'type': 'loss', 'content': 0.1607494056224823, 'timestamp': '2025-09-30 22:29:36.312271', 'step': 11196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:36.381673', 'step': 11196, 'epoch': 2} {'type': 'loss', 'content': 0.11619460582733154, 'timestamp': '2025-09-30 22:29:36.384346', 'step': 11197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:36.443066', 'step': 11197, 'epoch': 2} {'type': 'loss', 'content': 0.049584999680519104, 'timestamp': '2025-09-30 22:29:36.445536', 'step': 11198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:36.503083', 'step': 11198, 'epoch': 2} {'type': 'loss', 'content': 0.12527067959308624, 'timestamp': '2025-09-30 22:29:36.505862', 'step': 11199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:36.563027', 'step': 11199, 'epoch': 2} {'type': 'loss', 'content': 0.13764327764511108, 'timestamp': '2025-09-30 22:29:36.569633', 'step': 11200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:36.626797', 'step': 11200, 'epoch': 2} {'type': 'loss', 'content': 0.1521083116531372, 'timestamp': '2025-09-30 22:29:36.629640', 'step': 11201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:36.692139', 'step': 11201, 'epoch': 2} {'type': 'loss', 'content': 0.05792907997965813, 'timestamp': '2025-09-30 22:29:36.695290', 'step': 11202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:36.754005', 'step': 11202, 'epoch': 2} {'type': 'loss', 'content': 0.15048335492610931, 'timestamp': '2025-09-30 22:29:36.758640', 'step': 11203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:36.816536', 'step': 11203, 'epoch': 2} {'type': 'loss', 'content': 0.16267874836921692, 'timestamp': '2025-09-30 22:29:36.823629', 'step': 11204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:36.882977', 'step': 11204, 'epoch': 2} {'type': 'loss', 'content': 0.12689287960529327, 'timestamp': '2025-09-30 22:29:36.886791', 'step': 11205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:36.948334', 'step': 11205, 'epoch': 2} {'type': 'loss', 'content': 0.1017184928059578, 'timestamp': '2025-09-30 22:29:36.965141', 'step': 11206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:37.022770', 'step': 11206, 'epoch': 2} {'type': 'loss', 'content': 0.11760805547237396, 'timestamp': '2025-09-30 22:29:37.026932', 'step': 11207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:37.086132', 'step': 11207, 'epoch': 2} {'type': 'loss', 'content': 0.11439590156078339, 'timestamp': '2025-09-30 22:29:37.103076', 'step': 11208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:37.176508', 'step': 11208, 'epoch': 2} {'type': 'loss', 'content': 0.16658718883991241, 'timestamp': '2025-09-30 22:29:37.190649', 'step': 11209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:37.267112', 'step': 11209, 'epoch': 2} {'type': 'loss', 'content': 0.13285554945468903, 'timestamp': '2025-09-30 22:29:37.271042', 'step': 11210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:37.338634', 'step': 11210, 'epoch': 2} {'type': 'loss', 'content': 0.14129874110221863, 'timestamp': '2025-09-30 22:29:37.342039', 'step': 11211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:37.405560', 'step': 11211, 'epoch': 2} {'type': 'loss', 'content': 0.07251141965389252, 'timestamp': '2025-09-30 22:29:37.412817', 'step': 11212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:37.469741', 'step': 11212, 'epoch': 2} {'type': 'loss', 'content': 0.15144625306129456, 'timestamp': '2025-09-30 22:29:37.473259', 'step': 11213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:37.538242', 'step': 11213, 'epoch': 2} {'type': 'loss', 'content': 0.08922091871500015, 'timestamp': '2025-09-30 22:29:37.548599', 'step': 11214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:37.620870', 'step': 11214, 'epoch': 2} {'type': 'loss', 'content': 0.09610147029161453, 'timestamp': '2025-09-30 22:29:37.626655', 'step': 11215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:37.686730', 'step': 11215, 'epoch': 2} {'type': 'loss', 'content': 0.1345585137605667, 'timestamp': '2025-09-30 22:29:37.693923', 'step': 11216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:37.768282', 'step': 11216, 'epoch': 2} {'type': 'loss', 'content': 0.11724192649126053, 'timestamp': '2025-09-30 22:29:37.771459', 'step': 11217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:29:37.830907', 'step': 11217, 'epoch': 2} {'type': 'loss', 'content': 0.20234133303165436, 'timestamp': '2025-09-30 22:29:37.834024', 'step': 11218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:37.896242', 'step': 11218, 'epoch': 2} {'type': 'loss', 'content': 0.07481589168310165, 'timestamp': '2025-09-30 22:29:37.899346', 'step': 11219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:37.958916', 'step': 11219, 'epoch': 2} {'type': 'loss', 'content': 0.06930312514305115, 'timestamp': '2025-09-30 22:29:37.973048', 'step': 11220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:38.037717', 'step': 11220, 'epoch': 2} {'type': 'loss', 'content': 0.09885084629058838, 'timestamp': '2025-09-30 22:29:38.057424', 'step': 11221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:38.117151', 'step': 11221, 'epoch': 2} {'type': 'loss', 'content': 0.09922874718904495, 'timestamp': '2025-09-30 22:29:38.121402', 'step': 11222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:38.184709', 'step': 11222, 'epoch': 2} {'type': 'loss', 'content': 0.1004214733839035, 'timestamp': '2025-09-30 22:29:38.190855', 'step': 11223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:38.273237', 'step': 11223, 'epoch': 2} {'type': 'loss', 'content': 0.09134671092033386, 'timestamp': '2025-09-30 22:29:38.281170', 'step': 11224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:38.363752', 'step': 11224, 'epoch': 2} {'type': 'loss', 'content': 0.038384582847356796, 'timestamp': '2025-09-30 22:29:38.367295', 'step': 11225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:38.426157', 'step': 11225, 'epoch': 2} {'type': 'loss', 'content': 0.0608016736805439, 'timestamp': '2025-09-30 22:29:38.428970', 'step': 11226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:38.489407', 'step': 11226, 'epoch': 2} {'type': 'loss', 'content': 0.08438201993703842, 'timestamp': '2025-09-30 22:29:38.492854', 'step': 11227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:38.552324', 'step': 11227, 'epoch': 2} {'type': 'loss', 'content': 0.14418114721775055, 'timestamp': '2025-09-30 22:29:38.558690', 'step': 11228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:38.642337', 'step': 11228, 'epoch': 2} {'type': 'loss', 'content': 0.13059969246387482, 'timestamp': '2025-09-30 22:29:38.646212', 'step': 11229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:38.708748', 'step': 11229, 'epoch': 2} {'type': 'loss', 'content': 0.10886824131011963, 'timestamp': '2025-09-30 22:29:38.712390', 'step': 11230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:38.771177', 'step': 11230, 'epoch': 2} {'type': 'loss', 'content': 0.09218353778123856, 'timestamp': '2025-09-30 22:29:38.774325', 'step': 11231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:38.839145', 'step': 11231, 'epoch': 2} {'type': 'loss', 'content': 0.11760014295578003, 'timestamp': '2025-09-30 22:29:38.845610', 'step': 11232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:38.919195', 'step': 11232, 'epoch': 2} {'type': 'loss', 'content': 0.14638292789459229, 'timestamp': '2025-09-30 22:29:38.923701', 'step': 11233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:38.990275', 'step': 11233, 'epoch': 2} {'type': 'loss', 'content': 0.1674899160861969, 'timestamp': '2025-09-30 22:29:38.994457', 'step': 11234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:39.051974', 'step': 11234, 'epoch': 2} {'type': 'loss', 'content': 0.03248216584324837, 'timestamp': '2025-09-30 22:29:39.061792', 'step': 11235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:39.120157', 'step': 11235, 'epoch': 2} {'type': 'loss', 'content': 0.03888183832168579, 'timestamp': '2025-09-30 22:29:39.127056', 'step': 11236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:39.187214', 'step': 11236, 'epoch': 2} {'type': 'loss', 'content': 0.12144897133111954, 'timestamp': '2025-09-30 22:29:39.193546', 'step': 11237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:39.255099', 'step': 11237, 'epoch': 2} {'type': 'loss', 'content': 0.11584911495447159, 'timestamp': '2025-09-30 22:29:39.258503', 'step': 11238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:39.316214', 'step': 11238, 'epoch': 2} {'type': 'loss', 'content': 0.09056174755096436, 'timestamp': '2025-09-30 22:29:39.320317', 'step': 11239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:39.378756', 'step': 11239, 'epoch': 2} {'type': 'loss', 'content': 0.1076129600405693, 'timestamp': '2025-09-30 22:29:39.385050', 'step': 11240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:39.449847', 'step': 11240, 'epoch': 2} {'type': 'loss', 'content': 0.10894671827554703, 'timestamp': '2025-09-30 22:29:39.454099', 'step': 11241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:39.516741', 'step': 11241, 'epoch': 2} {'type': 'loss', 'content': 0.07133420556783676, 'timestamp': '2025-09-30 22:29:39.520799', 'step': 11242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:39.587902', 'step': 11242, 'epoch': 2} {'type': 'loss', 'content': 0.1291775107383728, 'timestamp': '2025-09-30 22:29:39.591635', 'step': 11243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:39.650266', 'step': 11243, 'epoch': 2} {'type': 'loss', 'content': 0.09865837544202805, 'timestamp': '2025-09-30 22:29:39.656796', 'step': 11244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:39.713096', 'step': 11244, 'epoch': 2} {'type': 'loss', 'content': 0.18498757481575012, 'timestamp': '2025-09-30 22:29:39.716685', 'step': 11245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:39.781388', 'step': 11245, 'epoch': 2} {'type': 'loss', 'content': 0.07154285162687302, 'timestamp': '2025-09-30 22:29:39.785328', 'step': 11246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:39.851979', 'step': 11246, 'epoch': 2} {'type': 'loss', 'content': 0.14620403945446014, 'timestamp': '2025-09-30 22:29:39.856813', 'step': 11247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:29:39.928894', 'step': 11247, 'epoch': 2} {'type': 'loss', 'content': 0.16094215214252472, 'timestamp': '2025-09-30 22:29:39.935773', 'step': 11248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:39.995215', 'step': 11248, 'epoch': 2} {'type': 'loss', 'content': 0.15607307851314545, 'timestamp': '2025-09-30 22:29:39.998505', 'step': 11249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:40.060948', 'step': 11249, 'epoch': 2} {'type': 'loss', 'content': 0.11814914643764496, 'timestamp': '2025-09-30 22:29:40.063685', 'step': 11250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:40.128957', 'step': 11250, 'epoch': 2} {'type': 'loss', 'content': 0.13782069087028503, 'timestamp': '2025-09-30 22:29:40.132055', 'step': 11251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:40.189482', 'step': 11251, 'epoch': 2} {'type': 'loss', 'content': 0.09156740456819534, 'timestamp': '2025-09-30 22:29:40.197241', 'step': 11252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:40.262147', 'step': 11252, 'epoch': 2} {'type': 'loss', 'content': 0.18362092971801758, 'timestamp': '2025-09-30 22:29:40.270172', 'step': 11253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:40.336635', 'step': 11253, 'epoch': 2} {'type': 'loss', 'content': 0.1711110919713974, 'timestamp': '2025-09-30 22:29:40.340521', 'step': 11254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:40.398287', 'step': 11254, 'epoch': 2} {'type': 'loss', 'content': 0.1550707072019577, 'timestamp': '2025-09-30 22:29:40.402981', 'step': 11255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:40.472319', 'step': 11255, 'epoch': 2} {'type': 'loss', 'content': 0.1400211602449417, 'timestamp': '2025-09-30 22:29:40.479305', 'step': 11256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:40.548833', 'step': 11256, 'epoch': 2} {'type': 'loss', 'content': 0.029659675434231758, 'timestamp': '2025-09-30 22:29:40.552993', 'step': 11257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:40.619362', 'step': 11257, 'epoch': 2} {'type': 'loss', 'content': 0.10041610151529312, 'timestamp': '2025-09-30 22:29:40.623320', 'step': 11258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:40.682249', 'step': 11258, 'epoch': 2} {'type': 'loss', 'content': 0.20116089284420013, 'timestamp': '2025-09-30 22:29:40.684786', 'step': 11259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:40.742044', 'step': 11259, 'epoch': 2} {'type': 'loss', 'content': 0.054583944380283356, 'timestamp': '2025-09-30 22:29:40.749216', 'step': 11260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:40.805290', 'step': 11260, 'epoch': 2} {'type': 'loss', 'content': 0.06068204343318939, 'timestamp': '2025-09-30 22:29:40.809813', 'step': 11261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:40.879792', 'step': 11261, 'epoch': 2} {'type': 'loss', 'content': 0.077284075319767, 'timestamp': '2025-09-30 22:29:40.883420', 'step': 11262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:40.949968', 'step': 11262, 'epoch': 2} {'type': 'loss', 'content': 0.0679335966706276, 'timestamp': '2025-09-30 22:29:40.964322', 'step': 11263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:41.020935', 'step': 11263, 'epoch': 2} {'type': 'loss', 'content': 0.14333467185497284, 'timestamp': '2025-09-30 22:29:41.027935', 'step': 11264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:41.085965', 'step': 11264, 'epoch': 2} {'type': 'loss', 'content': 0.08953991532325745, 'timestamp': '2025-09-30 22:29:41.088965', 'step': 11265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:41.145137', 'step': 11265, 'epoch': 2} {'type': 'loss', 'content': 0.07454826682806015, 'timestamp': '2025-09-30 22:29:41.148485', 'step': 11266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:41.205712', 'step': 11266, 'epoch': 2} {'type': 'loss', 'content': 0.14767436683177948, 'timestamp': '2025-09-30 22:29:41.209761', 'step': 11267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:41.275185', 'step': 11267, 'epoch': 2} {'type': 'loss', 'content': 0.04820400848984718, 'timestamp': '2025-09-30 22:29:41.282133', 'step': 11268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:41.337797', 'step': 11268, 'epoch': 2} {'type': 'loss', 'content': 0.15698453783988953, 'timestamp': '2025-09-30 22:29:41.341366', 'step': 11269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:41.399868', 'step': 11269, 'epoch': 2} {'type': 'loss', 'content': 0.1175089031457901, 'timestamp': '2025-09-30 22:29:41.402983', 'step': 11270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:41.461515', 'step': 11270, 'epoch': 2} {'type': 'loss', 'content': 0.10427246242761612, 'timestamp': '2025-09-30 22:29:41.464746', 'step': 11271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:41.523665', 'step': 11271, 'epoch': 2} {'type': 'loss', 'content': 0.17119085788726807, 'timestamp': '2025-09-30 22:29:41.536555', 'step': 11272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:41.595394', 'step': 11272, 'epoch': 2} {'type': 'loss', 'content': 0.1415502279996872, 'timestamp': '2025-09-30 22:29:41.604753', 'step': 11273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:41.662083', 'step': 11273, 'epoch': 2} {'type': 'loss', 'content': 0.038618698716163635, 'timestamp': '2025-09-30 22:29:41.666016', 'step': 11274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:41.723957', 'step': 11274, 'epoch': 2} {'type': 'loss', 'content': 0.10741633176803589, 'timestamp': '2025-09-30 22:29:41.728231', 'step': 11275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:41.786979', 'step': 11275, 'epoch': 2} {'type': 'loss', 'content': 0.14387966692447662, 'timestamp': '2025-09-30 22:29:41.793172', 'step': 11276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:41.848679', 'step': 11276, 'epoch': 2} {'type': 'loss', 'content': 0.10626721382141113, 'timestamp': '2025-09-30 22:29:41.851686', 'step': 11277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:41.909204', 'step': 11277, 'epoch': 2} {'type': 'loss', 'content': 0.051399555057287216, 'timestamp': '2025-09-30 22:29:41.918748', 'step': 11278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:41.980036', 'step': 11278, 'epoch': 2} {'type': 'loss', 'content': 0.15990102291107178, 'timestamp': '2025-09-30 22:29:41.982866', 'step': 11279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:42.039772', 'step': 11279, 'epoch': 2} {'type': 'loss', 'content': 0.12716521322727203, 'timestamp': '2025-09-30 22:29:42.046112', 'step': 11280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:42.106909', 'step': 11280, 'epoch': 2} {'type': 'loss', 'content': 0.07414814084768295, 'timestamp': '2025-09-30 22:29:42.110254', 'step': 11281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:42.169671', 'step': 11281, 'epoch': 2} {'type': 'loss', 'content': 0.14153729379177094, 'timestamp': '2025-09-30 22:29:42.175632', 'step': 11282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:42.233503', 'step': 11282, 'epoch': 2} {'type': 'loss', 'content': 0.0651426613330841, 'timestamp': '2025-09-30 22:29:42.236976', 'step': 11283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:42.293835', 'step': 11283, 'epoch': 2} {'type': 'loss', 'content': 0.17131927609443665, 'timestamp': '2025-09-30 22:29:42.300095', 'step': 11284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:42.356567', 'step': 11284, 'epoch': 2} {'type': 'loss', 'content': 0.09680693596601486, 'timestamp': '2025-09-30 22:29:42.360603', 'step': 11285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:42.418567', 'step': 11285, 'epoch': 2} {'type': 'loss', 'content': 0.21041955053806305, 'timestamp': '2025-09-30 22:29:42.422259', 'step': 11286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:42.479041', 'step': 11286, 'epoch': 2} {'type': 'loss', 'content': 0.07376118749380112, 'timestamp': '2025-09-30 22:29:42.484696', 'step': 11287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:42.547134', 'step': 11287, 'epoch': 2} {'type': 'loss', 'content': 0.17104025185108185, 'timestamp': '2025-09-30 22:29:42.553198', 'step': 11288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:42.621856', 'step': 11288, 'epoch': 2} {'type': 'loss', 'content': 0.09668397903442383, 'timestamp': '2025-09-30 22:29:42.625722', 'step': 11289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:42.684507', 'step': 11289, 'epoch': 2} {'type': 'loss', 'content': 0.08657259494066238, 'timestamp': '2025-09-30 22:29:42.689800', 'step': 11290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:42.752478', 'step': 11290, 'epoch': 2} {'type': 'loss', 'content': 0.09237995743751526, 'timestamp': '2025-09-30 22:29:42.758626', 'step': 11291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:42.826876', 'step': 11291, 'epoch': 2} {'type': 'loss', 'content': 0.15492720901966095, 'timestamp': '2025-09-30 22:29:42.834792', 'step': 11292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:42.907323', 'step': 11292, 'epoch': 2} {'type': 'loss', 'content': 0.19443243741989136, 'timestamp': '2025-09-30 22:29:42.911218', 'step': 11293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:42.985683', 'step': 11293, 'epoch': 2} {'type': 'loss', 'content': 0.15423813462257385, 'timestamp': '2025-09-30 22:29:42.988627', 'step': 11294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:43.070067', 'step': 11294, 'epoch': 2} {'type': 'loss', 'content': 0.15821689367294312, 'timestamp': '2025-09-30 22:29:43.074747', 'step': 11295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:43.135728', 'step': 11295, 'epoch': 2} {'type': 'loss', 'content': 0.12276419252157211, 'timestamp': '2025-09-30 22:29:43.144343', 'step': 11296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:43.206550', 'step': 11296, 'epoch': 2} {'type': 'loss', 'content': 0.13786062598228455, 'timestamp': '2025-09-30 22:29:43.222600', 'step': 11297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:43.281023', 'step': 11297, 'epoch': 2} {'type': 'loss', 'content': 0.07513745129108429, 'timestamp': '2025-09-30 22:29:43.285449', 'step': 11298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:43.344238', 'step': 11298, 'epoch': 2} {'type': 'loss', 'content': 0.12842091917991638, 'timestamp': '2025-09-30 22:29:43.348214', 'step': 11299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:43.406451', 'step': 11299, 'epoch': 2} {'type': 'loss', 'content': 0.12894706428050995, 'timestamp': '2025-09-30 22:29:43.412331', 'step': 11300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:43.471859', 'step': 11300, 'epoch': 2} {'type': 'loss', 'content': 0.17424483597278595, 'timestamp': '2025-09-30 22:29:43.479265', 'step': 11301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:43.536826', 'step': 11301, 'epoch': 2} {'type': 'loss', 'content': 0.13738755881786346, 'timestamp': '2025-09-30 22:29:43.539639', 'step': 11302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:43.607574', 'step': 11302, 'epoch': 2} {'type': 'loss', 'content': 0.16006873548030853, 'timestamp': '2025-09-30 22:29:43.610769', 'step': 11303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:43.671101', 'step': 11303, 'epoch': 2} {'type': 'loss', 'content': 0.16525496542453766, 'timestamp': '2025-09-30 22:29:43.677923', 'step': 11304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:43.743910', 'step': 11304, 'epoch': 2} {'type': 'loss', 'content': 0.2410673052072525, 'timestamp': '2025-09-30 22:29:43.747244', 'step': 11305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:43.803832', 'step': 11305, 'epoch': 2} {'type': 'loss', 'content': 0.1214209496974945, 'timestamp': '2025-09-30 22:29:43.806979', 'step': 11306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:43.864312', 'step': 11306, 'epoch': 2} {'type': 'loss', 'content': 0.22538384795188904, 'timestamp': '2025-09-30 22:29:43.868454', 'step': 11307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:43.936910', 'step': 11307, 'epoch': 2} {'type': 'loss', 'content': 0.20324154198169708, 'timestamp': '2025-09-30 22:29:43.944668', 'step': 11308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.022407', 'step': 11308, 'epoch': 2} {'type': 'loss', 'content': 0.18656368553638458, 'timestamp': '2025-09-30 22:29:44.026553', 'step': 11309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.093151', 'step': 11309, 'epoch': 2} {'type': 'loss', 'content': 0.11465609073638916, 'timestamp': '2025-09-30 22:29:44.100794', 'step': 11310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:44.162160', 'step': 11310, 'epoch': 2} {'type': 'loss', 'content': 0.13619528710842133, 'timestamp': '2025-09-30 22:29:44.166527', 'step': 11311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.225266', 'step': 11311, 'epoch': 2} {'type': 'loss', 'content': 0.12716613709926605, 'timestamp': '2025-09-30 22:29:44.232788', 'step': 11312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:44.299343', 'step': 11312, 'epoch': 2} {'type': 'loss', 'content': 0.14576120674610138, 'timestamp': '2025-09-30 22:29:44.302716', 'step': 11313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:44.360072', 'step': 11313, 'epoch': 2} {'type': 'loss', 'content': 0.11560133844614029, 'timestamp': '2025-09-30 22:29:44.364048', 'step': 11314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:44.422308', 'step': 11314, 'epoch': 2} {'type': 'loss', 'content': 0.14246509969234467, 'timestamp': '2025-09-30 22:29:44.426743', 'step': 11315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.484253', 'step': 11315, 'epoch': 2} {'type': 'loss', 'content': 0.18270480632781982, 'timestamp': '2025-09-30 22:29:44.499613', 'step': 11316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.563310', 'step': 11316, 'epoch': 2} {'type': 'loss', 'content': 0.13571283221244812, 'timestamp': '2025-09-30 22:29:44.567172', 'step': 11317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.625993', 'step': 11317, 'epoch': 2} {'type': 'loss', 'content': 0.12918245792388916, 'timestamp': '2025-09-30 22:29:44.630286', 'step': 11318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:44.688216', 'step': 11318, 'epoch': 2} {'type': 'loss', 'content': 0.08056017011404037, 'timestamp': '2025-09-30 22:29:44.692805', 'step': 11319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:44.764948', 'step': 11319, 'epoch': 2} {'type': 'loss', 'content': 0.07772129029035568, 'timestamp': '2025-09-30 22:29:44.771893', 'step': 11320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.829107', 'step': 11320, 'epoch': 2} {'type': 'loss', 'content': 0.18929558992385864, 'timestamp': '2025-09-30 22:29:44.832750', 'step': 11321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:44.911852', 'step': 11321, 'epoch': 2} {'type': 'loss', 'content': 0.08949413895606995, 'timestamp': '2025-09-30 22:29:44.919539', 'step': 11322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:44.980431', 'step': 11322, 'epoch': 2} {'type': 'loss', 'content': 0.13435474038124084, 'timestamp': '2025-09-30 22:29:44.993951', 'step': 11323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:45.051642', 'step': 11323, 'epoch': 2} {'type': 'loss', 'content': 0.065473772585392, 'timestamp': '2025-09-30 22:29:45.059280', 'step': 11324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:45.119316', 'step': 11324, 'epoch': 2} {'type': 'loss', 'content': 0.17803460359573364, 'timestamp': '2025-09-30 22:29:45.123400', 'step': 11325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:45.186761', 'step': 11325, 'epoch': 2} {'type': 'loss', 'content': 0.20721717178821564, 'timestamp': '2025-09-30 22:29:45.190019', 'step': 11326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:45.253547', 'step': 11326, 'epoch': 2} {'type': 'loss', 'content': 0.08127416670322418, 'timestamp': '2025-09-30 22:29:45.256943', 'step': 11327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:45.319460', 'step': 11327, 'epoch': 2} {'type': 'loss', 'content': 0.05769530311226845, 'timestamp': '2025-09-30 22:29:45.334270', 'step': 11328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:45.410322', 'step': 11328, 'epoch': 2} {'type': 'loss', 'content': 0.09008891135454178, 'timestamp': '2025-09-30 22:29:45.413069', 'step': 11329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:45.471578', 'step': 11329, 'epoch': 2} {'type': 'loss', 'content': 0.13365240395069122, 'timestamp': '2025-09-30 22:29:45.475681', 'step': 11330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:45.548734', 'step': 11330, 'epoch': 2} {'type': 'loss', 'content': 0.03673185408115387, 'timestamp': '2025-09-30 22:29:45.551839', 'step': 11331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:45.621474', 'step': 11331, 'epoch': 2} {'type': 'loss', 'content': 0.25988665223121643, 'timestamp': '2025-09-30 22:29:45.629259', 'step': 11332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:45.684844', 'step': 11332, 'epoch': 2} {'type': 'loss', 'content': 0.19779688119888306, 'timestamp': '2025-09-30 22:29:45.688298', 'step': 11333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:45.750292', 'step': 11333, 'epoch': 2} {'type': 'loss', 'content': 0.10176149010658264, 'timestamp': '2025-09-30 22:29:45.753807', 'step': 11334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:45.810526', 'step': 11334, 'epoch': 2} {'type': 'loss', 'content': 0.05691415071487427, 'timestamp': '2025-09-30 22:29:45.813641', 'step': 11335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:45.886421', 'step': 11335, 'epoch': 2} {'type': 'loss', 'content': 0.07367058843374252, 'timestamp': '2025-09-30 22:29:45.893853', 'step': 11336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:45.950816', 'step': 11336, 'epoch': 2} {'type': 'loss', 'content': 0.07176244258880615, 'timestamp': '2025-09-30 22:29:45.963992', 'step': 11337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:46.028617', 'step': 11337, 'epoch': 2} {'type': 'loss', 'content': 0.06716598570346832, 'timestamp': '2025-09-30 22:29:46.033757', 'step': 11338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:46.093628', 'step': 11338, 'epoch': 2} {'type': 'loss', 'content': 0.18563437461853027, 'timestamp': '2025-09-30 22:29:46.097980', 'step': 11339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:46.157852', 'step': 11339, 'epoch': 2} {'type': 'loss', 'content': 0.06211031228303909, 'timestamp': '2025-09-30 22:29:46.165253', 'step': 11340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:46.236803', 'step': 11340, 'epoch': 2} {'type': 'loss', 'content': 0.16431879997253418, 'timestamp': '2025-09-30 22:29:46.250143', 'step': 11341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:46.308028', 'step': 11341, 'epoch': 2} {'type': 'loss', 'content': 0.1330246478319168, 'timestamp': '2025-09-30 22:29:46.310963', 'step': 11342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:46.380679', 'step': 11342, 'epoch': 2} {'type': 'loss', 'content': 0.07587850838899612, 'timestamp': '2025-09-30 22:29:46.384603', 'step': 11343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:46.445192', 'step': 11343, 'epoch': 2} {'type': 'loss', 'content': 0.16801221668720245, 'timestamp': '2025-09-30 22:29:46.452737', 'step': 11344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:46.518229', 'step': 11344, 'epoch': 2} {'type': 'loss', 'content': 0.14495041966438293, 'timestamp': '2025-09-30 22:29:46.534409', 'step': 11345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:46.597726', 'step': 11345, 'epoch': 2} {'type': 'loss', 'content': 0.10107608139514923, 'timestamp': '2025-09-30 22:29:46.601878', 'step': 11346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:46.680557', 'step': 11346, 'epoch': 2} {'type': 'loss', 'content': 0.1063927561044693, 'timestamp': '2025-09-30 22:29:46.684790', 'step': 11347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:46.742996', 'step': 11347, 'epoch': 2} {'type': 'loss', 'content': 0.12350364774465561, 'timestamp': '2025-09-30 22:29:46.750886', 'step': 11348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:46.808466', 'step': 11348, 'epoch': 2} {'type': 'loss', 'content': 0.14757736027240753, 'timestamp': '2025-09-30 22:29:46.811849', 'step': 11349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:46.868006', 'step': 11349, 'epoch': 2} {'type': 'loss', 'content': 0.15123416483402252, 'timestamp': '2025-09-30 22:29:46.872222', 'step': 11350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:46.937897', 'step': 11350, 'epoch': 2} {'type': 'loss', 'content': 0.13025487959384918, 'timestamp': '2025-09-30 22:29:46.941475', 'step': 11351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:47.019420', 'step': 11351, 'epoch': 2} {'type': 'loss', 'content': 0.14800921082496643, 'timestamp': '2025-09-30 22:29:47.027426', 'step': 11352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:47.094004', 'step': 11352, 'epoch': 2} {'type': 'loss', 'content': 0.076941579580307, 'timestamp': '2025-09-30 22:29:47.098623', 'step': 11353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:47.159781', 'step': 11353, 'epoch': 2} {'type': 'loss', 'content': 0.12633682787418365, 'timestamp': '2025-09-30 22:29:47.163931', 'step': 11354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:47.229326', 'step': 11354, 'epoch': 2} {'type': 'loss', 'content': 0.1414772868156433, 'timestamp': '2025-09-30 22:29:47.233147', 'step': 11355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:47.301254', 'step': 11355, 'epoch': 2} {'type': 'loss', 'content': 0.09701769798994064, 'timestamp': '2025-09-30 22:29:47.308312', 'step': 11356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:47.367611', 'step': 11356, 'epoch': 2} {'type': 'loss', 'content': 0.1160053163766861, 'timestamp': '2025-09-30 22:29:47.372096', 'step': 11357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:47.430687', 'step': 11357, 'epoch': 2} {'type': 'loss', 'content': 0.12248432636260986, 'timestamp': '2025-09-30 22:29:47.434828', 'step': 11358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:47.500889', 'step': 11358, 'epoch': 2} {'type': 'loss', 'content': 0.09302833676338196, 'timestamp': '2025-09-30 22:29:47.504918', 'step': 11359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:47.563315', 'step': 11359, 'epoch': 2} {'type': 'loss', 'content': 0.15224973857402802, 'timestamp': '2025-09-30 22:29:47.571695', 'step': 11360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:47.631015', 'step': 11360, 'epoch': 2} {'type': 'loss', 'content': 0.15348045527935028, 'timestamp': '2025-09-30 22:29:47.635362', 'step': 11361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:47.693654', 'step': 11361, 'epoch': 2} {'type': 'loss', 'content': 0.07249189168214798, 'timestamp': '2025-09-30 22:29:47.709257', 'step': 11362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:47.767802', 'step': 11362, 'epoch': 2} {'type': 'loss', 'content': 0.10364975035190582, 'timestamp': '2025-09-30 22:29:47.772289', 'step': 11363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:47.836115', 'step': 11363, 'epoch': 2} {'type': 'loss', 'content': 0.08832734823226929, 'timestamp': '2025-09-30 22:29:47.842999', 'step': 11364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:47.901412', 'step': 11364, 'epoch': 2} {'type': 'loss', 'content': 0.10714707523584366, 'timestamp': '2025-09-30 22:29:47.905041', 'step': 11365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:47.965980', 'step': 11365, 'epoch': 2} {'type': 'loss', 'content': 0.11379320174455643, 'timestamp': '2025-09-30 22:29:47.969990', 'step': 11366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:48.029266', 'step': 11366, 'epoch': 2} {'type': 'loss', 'content': 0.14383810758590698, 'timestamp': '2025-09-30 22:29:48.039316', 'step': 11367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:48.098341', 'step': 11367, 'epoch': 2} {'type': 'loss', 'content': 0.05794050171971321, 'timestamp': '2025-09-30 22:29:48.106457', 'step': 11368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:48.174736', 'step': 11368, 'epoch': 2} {'type': 'loss', 'content': 0.16581036150455475, 'timestamp': '2025-09-30 22:29:48.179790', 'step': 11369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:48.238906', 'step': 11369, 'epoch': 2} {'type': 'loss', 'content': 0.14271610975265503, 'timestamp': '2025-09-30 22:29:48.243038', 'step': 11370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:48.302221', 'step': 11370, 'epoch': 2} {'type': 'loss', 'content': 0.10774748027324677, 'timestamp': '2025-09-30 22:29:48.305164', 'step': 11371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:48.362669', 'step': 11371, 'epoch': 2} {'type': 'loss', 'content': 0.12114518135786057, 'timestamp': '2025-09-30 22:29:48.370936', 'step': 11372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:48.429657', 'step': 11372, 'epoch': 2} {'type': 'loss', 'content': 0.10576451569795609, 'timestamp': '2025-09-30 22:29:48.433461', 'step': 11373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:48.504834', 'step': 11373, 'epoch': 2} {'type': 'loss', 'content': 0.0976373627781868, 'timestamp': '2025-09-30 22:29:48.516840', 'step': 11374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:48.576835', 'step': 11374, 'epoch': 2} {'type': 'loss', 'content': 0.1610800325870514, 'timestamp': '2025-09-30 22:29:48.579789', 'step': 11375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:48.639100', 'step': 11375, 'epoch': 2} {'type': 'loss', 'content': 0.05575094372034073, 'timestamp': '2025-09-30 22:29:48.646819', 'step': 11376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:48.703612', 'step': 11376, 'epoch': 2} {'type': 'loss', 'content': 0.15779271721839905, 'timestamp': '2025-09-30 22:29:48.708157', 'step': 11377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:48.774902', 'step': 11377, 'epoch': 2} {'type': 'loss', 'content': 0.19943536818027496, 'timestamp': '2025-09-30 22:29:48.788472', 'step': 11378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:48.847607', 'step': 11378, 'epoch': 2} {'type': 'loss', 'content': 0.2186402529478073, 'timestamp': '2025-09-30 22:29:48.851547', 'step': 11379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:48.928578', 'step': 11379, 'epoch': 2} {'type': 'loss', 'content': 0.15886330604553223, 'timestamp': '2025-09-30 22:29:48.936267', 'step': 11380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:48.993128', 'step': 11380, 'epoch': 2} {'type': 'loss', 'content': 0.08340363949537277, 'timestamp': '2025-09-30 22:29:48.996790', 'step': 11381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:49.056999', 'step': 11381, 'epoch': 2} {'type': 'loss', 'content': 0.1434190273284912, 'timestamp': '2025-09-30 22:29:49.061697', 'step': 11382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:49.121607', 'step': 11382, 'epoch': 2} {'type': 'loss', 'content': 0.12588125467300415, 'timestamp': '2025-09-30 22:29:49.129373', 'step': 11383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:49.187999', 'step': 11383, 'epoch': 2} {'type': 'loss', 'content': 0.09642380475997925, 'timestamp': '2025-09-30 22:29:49.196609', 'step': 11384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:49.256980', 'step': 11384, 'epoch': 2} {'type': 'loss', 'content': 0.06230311840772629, 'timestamp': '2025-09-30 22:29:49.262814', 'step': 11385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:49.328994', 'step': 11385, 'epoch': 2} {'type': 'loss', 'content': 0.15112648904323578, 'timestamp': '2025-09-30 22:29:49.333195', 'step': 11386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:49.390518', 'step': 11386, 'epoch': 2} {'type': 'loss', 'content': 0.11673102527856827, 'timestamp': '2025-09-30 22:29:49.400621', 'step': 11387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:49.473908', 'step': 11387, 'epoch': 2} {'type': 'loss', 'content': 0.12016943842172623, 'timestamp': '2025-09-30 22:29:49.480336', 'step': 11388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:49.549124', 'step': 11388, 'epoch': 2} {'type': 'loss', 'content': 0.08404635637998581, 'timestamp': '2025-09-30 22:29:49.554499', 'step': 11389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:49.612593', 'step': 11389, 'epoch': 2} {'type': 'loss', 'content': 0.07257161289453506, 'timestamp': '2025-09-30 22:29:49.615370', 'step': 11390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:49.673254', 'step': 11390, 'epoch': 2} {'type': 'loss', 'content': 0.1810145527124405, 'timestamp': '2025-09-30 22:29:49.678868', 'step': 11391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:49.738282', 'step': 11391, 'epoch': 2} {'type': 'loss', 'content': 0.15067821741104126, 'timestamp': '2025-09-30 22:29:49.746340', 'step': 11392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:49.817747', 'step': 11392, 'epoch': 2} {'type': 'loss', 'content': 0.12553369998931885, 'timestamp': '2025-09-30 22:29:49.834094', 'step': 11393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:49.893466', 'step': 11393, 'epoch': 2} {'type': 'loss', 'content': 0.12829194962978363, 'timestamp': '2025-09-30 22:29:49.906126', 'step': 11394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:49.963922', 'step': 11394, 'epoch': 2} {'type': 'loss', 'content': 0.1767619103193283, 'timestamp': '2025-09-30 22:29:49.967821', 'step': 11395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:50.028288', 'step': 11395, 'epoch': 2} {'type': 'loss', 'content': 0.05892442166805267, 'timestamp': '2025-09-30 22:29:50.037291', 'step': 11396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:50.110454', 'step': 11396, 'epoch': 2} {'type': 'loss', 'content': 0.16721943020820618, 'timestamp': '2025-09-30 22:29:50.116323', 'step': 11397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:50.191026', 'step': 11397, 'epoch': 2} {'type': 'loss', 'content': 0.11685706675052643, 'timestamp': '2025-09-30 22:29:50.195784', 'step': 11398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:50.256115', 'step': 11398, 'epoch': 2} {'type': 'loss', 'content': 0.16497407853603363, 'timestamp': '2025-09-30 22:29:50.262706', 'step': 11399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:50.322995', 'step': 11399, 'epoch': 2} {'type': 'loss', 'content': 0.12938690185546875, 'timestamp': '2025-09-30 22:29:50.331426', 'step': 11400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:50.390099', 'step': 11400, 'epoch': 2} {'type': 'loss', 'content': 0.14406681060791016, 'timestamp': '2025-09-30 22:29:50.404639', 'step': 11401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:50.476251', 'step': 11401, 'epoch': 2} {'type': 'loss', 'content': 0.1744387447834015, 'timestamp': '2025-09-30 22:29:50.490621', 'step': 11402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:50.561048', 'step': 11402, 'epoch': 2} {'type': 'loss', 'content': 0.1578378826379776, 'timestamp': '2025-09-30 22:29:50.565151', 'step': 11403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:50.633407', 'step': 11403, 'epoch': 2} {'type': 'loss', 'content': 0.1408836990594864, 'timestamp': '2025-09-30 22:29:50.649143', 'step': 11404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:50.740723', 'step': 11404, 'epoch': 2} {'type': 'loss', 'content': 0.16285252571105957, 'timestamp': '2025-09-30 22:29:50.752272', 'step': 11405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:50.832997', 'step': 11405, 'epoch': 2} {'type': 'loss', 'content': 0.16739672422409058, 'timestamp': '2025-09-30 22:29:50.836343', 'step': 11406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:50.893841', 'step': 11406, 'epoch': 2} {'type': 'loss', 'content': 0.11697273701429367, 'timestamp': '2025-09-30 22:29:50.897412', 'step': 11407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:50.956707', 'step': 11407, 'epoch': 2} {'type': 'loss', 'content': 0.16177494823932648, 'timestamp': '2025-09-30 22:29:50.963027', 'step': 11408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:51.019635', 'step': 11408, 'epoch': 2} {'type': 'loss', 'content': 0.10826128721237183, 'timestamp': '2025-09-30 22:29:51.031120', 'step': 11409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:51.091057', 'step': 11409, 'epoch': 2} {'type': 'loss', 'content': 0.10967759788036346, 'timestamp': '2025-09-30 22:29:51.094674', 'step': 11410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:51.166647', 'step': 11410, 'epoch': 2} {'type': 'loss', 'content': 0.24769870936870575, 'timestamp': '2025-09-30 22:29:51.170854', 'step': 11411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:51.237303', 'step': 11411, 'epoch': 2} {'type': 'loss', 'content': 0.10827935487031937, 'timestamp': '2025-09-30 22:29:51.244669', 'step': 11412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:51.314259', 'step': 11412, 'epoch': 2} {'type': 'loss', 'content': 0.14729420840740204, 'timestamp': '2025-09-30 22:29:51.317821', 'step': 11413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:51.375121', 'step': 11413, 'epoch': 2} {'type': 'loss', 'content': 0.08081132173538208, 'timestamp': '2025-09-30 22:29:51.378421', 'step': 11414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:51.448114', 'step': 11414, 'epoch': 2} {'type': 'loss', 'content': 0.09057848155498505, 'timestamp': '2025-09-30 22:29:51.452230', 'step': 11415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:51.520413', 'step': 11415, 'epoch': 2} {'type': 'loss', 'content': 0.07098940014839172, 'timestamp': '2025-09-30 22:29:51.527000', 'step': 11416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:51.583835', 'step': 11416, 'epoch': 2} {'type': 'loss', 'content': 0.11743754893541336, 'timestamp': '2025-09-30 22:29:51.588498', 'step': 11417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:51.667406', 'step': 11417, 'epoch': 2} {'type': 'loss', 'content': 0.14177565276622772, 'timestamp': '2025-09-30 22:29:51.684827', 'step': 11418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:51.761902', 'step': 11418, 'epoch': 2} {'type': 'loss', 'content': 0.13490134477615356, 'timestamp': '2025-09-30 22:29:51.776349', 'step': 11419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:51.855838', 'step': 11419, 'epoch': 2} {'type': 'loss', 'content': 0.05446689575910568, 'timestamp': '2025-09-30 22:29:51.864496', 'step': 11420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:51.924202', 'step': 11420, 'epoch': 2} {'type': 'loss', 'content': 0.18269525468349457, 'timestamp': '2025-09-30 22:29:51.928974', 'step': 11421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:51.988517', 'step': 11421, 'epoch': 2} {'type': 'loss', 'content': 0.11449813842773438, 'timestamp': '2025-09-30 22:29:52.005968', 'step': 11422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:52.067151', 'step': 11422, 'epoch': 2} {'type': 'loss', 'content': 0.14718776941299438, 'timestamp': '2025-09-30 22:29:52.071161', 'step': 11423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:52.145002', 'step': 11423, 'epoch': 2} {'type': 'loss', 'content': 0.10323421657085419, 'timestamp': '2025-09-30 22:29:52.152663', 'step': 11424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:52.210850', 'step': 11424, 'epoch': 2} {'type': 'loss', 'content': 0.09991557151079178, 'timestamp': '2025-09-30 22:29:52.216600', 'step': 11425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:52.275345', 'step': 11425, 'epoch': 2} {'type': 'loss', 'content': 0.17990995943546295, 'timestamp': '2025-09-30 22:29:52.281205', 'step': 11426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:52.339529', 'step': 11426, 'epoch': 2} {'type': 'loss', 'content': 0.07102605700492859, 'timestamp': '2025-09-30 22:29:52.342567', 'step': 11427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:52.399635', 'step': 11427, 'epoch': 2} {'type': 'loss', 'content': 0.12329614162445068, 'timestamp': '2025-09-30 22:29:52.406935', 'step': 11428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:52.480874', 'step': 11428, 'epoch': 2} {'type': 'loss', 'content': 0.2089565545320511, 'timestamp': '2025-09-30 22:29:52.485691', 'step': 11429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:52.542413', 'step': 11429, 'epoch': 2} {'type': 'loss', 'content': 0.1834680736064911, 'timestamp': '2025-09-30 22:29:52.545750', 'step': 11430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:29:52.605852', 'step': 11430, 'epoch': 2} {'type': 'loss', 'content': 0.08686553686857224, 'timestamp': '2025-09-30 22:29:52.609508', 'step': 11431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:52.666123', 'step': 11431, 'epoch': 2} {'type': 'loss', 'content': 0.12553814053535461, 'timestamp': '2025-09-30 22:29:52.672739', 'step': 11432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:52.739700', 'step': 11432, 'epoch': 2} {'type': 'loss', 'content': 0.160904660820961, 'timestamp': '2025-09-30 22:29:52.742786', 'step': 11433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:52.799512', 'step': 11433, 'epoch': 2} {'type': 'loss', 'content': 0.13321353495121002, 'timestamp': '2025-09-30 22:29:52.802593', 'step': 11434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:52.860172', 'step': 11434, 'epoch': 2} {'type': 'loss', 'content': 0.14497368037700653, 'timestamp': '2025-09-30 22:29:52.868679', 'step': 11435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:52.927230', 'step': 11435, 'epoch': 2} {'type': 'loss', 'content': 0.10340239107608795, 'timestamp': '2025-09-30 22:29:52.933399', 'step': 11436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:52.996788', 'step': 11436, 'epoch': 2} {'type': 'loss', 'content': 0.1359931230545044, 'timestamp': '2025-09-30 22:29:53.000156', 'step': 11437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:53.057820', 'step': 11437, 'epoch': 2} {'type': 'loss', 'content': 0.10556408017873764, 'timestamp': '2025-09-30 22:29:53.061116', 'step': 11438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:53.122559', 'step': 11438, 'epoch': 2} {'type': 'loss', 'content': 0.1371629536151886, 'timestamp': '2025-09-30 22:29:53.133787', 'step': 11439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:53.193714', 'step': 11439, 'epoch': 2} {'type': 'loss', 'content': 0.14457456767559052, 'timestamp': '2025-09-30 22:29:53.201828', 'step': 11440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:53.258854', 'step': 11440, 'epoch': 2} {'type': 'loss', 'content': 0.0747184231877327, 'timestamp': '2025-09-30 22:29:53.263173', 'step': 11441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:29:53.329054', 'step': 11441, 'epoch': 2} {'type': 'loss', 'content': 0.12678104639053345, 'timestamp': '2025-09-30 22:29:53.332566', 'step': 11442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:53.393526', 'step': 11442, 'epoch': 2} {'type': 'loss', 'content': 0.11004910618066788, 'timestamp': '2025-09-30 22:29:53.396777', 'step': 11443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:53.491524', 'step': 11443, 'epoch': 2} {'type': 'loss', 'content': 0.06865745782852173, 'timestamp': '2025-09-30 22:29:53.498590', 'step': 11444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:53.568159', 'step': 11444, 'epoch': 2} {'type': 'loss', 'content': 0.14138728380203247, 'timestamp': '2025-09-30 22:29:53.571783', 'step': 11445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:53.634927', 'step': 11445, 'epoch': 2} {'type': 'loss', 'content': 0.05583994835615158, 'timestamp': '2025-09-30 22:29:53.638660', 'step': 11446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:53.697202', 'step': 11446, 'epoch': 2} {'type': 'loss', 'content': 0.09740433841943741, 'timestamp': '2025-09-30 22:29:53.702019', 'step': 11447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:53.772277', 'step': 11447, 'epoch': 2} {'type': 'loss', 'content': 0.062266554683446884, 'timestamp': '2025-09-30 22:29:53.779123', 'step': 11448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:53.837724', 'step': 11448, 'epoch': 2} {'type': 'loss', 'content': 0.12702332437038422, 'timestamp': '2025-09-30 22:29:53.842508', 'step': 11449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:29:53.901321', 'step': 11449, 'epoch': 2} {'type': 'loss', 'content': 0.10779173672199249, 'timestamp': '2025-09-30 22:29:53.906093', 'step': 11450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:53.964456', 'step': 11450, 'epoch': 2} {'type': 'loss', 'content': 0.12950362265110016, 'timestamp': '2025-09-30 22:29:53.968377', 'step': 11451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:54.028726', 'step': 11451, 'epoch': 2} {'type': 'loss', 'content': 0.08474422246217728, 'timestamp': '2025-09-30 22:29:54.036012', 'step': 11452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:54.095615', 'step': 11452, 'epoch': 2} {'type': 'loss', 'content': 0.07140804827213287, 'timestamp': '2025-09-30 22:29:54.109149', 'step': 11453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:54.177409', 'step': 11453, 'epoch': 2} {'type': 'loss', 'content': 0.0917770266532898, 'timestamp': '2025-09-30 22:29:54.181904', 'step': 11454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:54.254754', 'step': 11454, 'epoch': 2} {'type': 'loss', 'content': 0.09994599223136902, 'timestamp': '2025-09-30 22:29:54.258883', 'step': 11455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:54.318583', 'step': 11455, 'epoch': 2} {'type': 'loss', 'content': 0.16245073080062866, 'timestamp': '2025-09-30 22:29:54.326081', 'step': 11456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:54.384012', 'step': 11456, 'epoch': 2} {'type': 'loss', 'content': 0.07971960306167603, 'timestamp': '2025-09-30 22:29:54.388748', 'step': 11457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:54.456515', 'step': 11457, 'epoch': 2} {'type': 'loss', 'content': 0.2115432769060135, 'timestamp': '2025-09-30 22:29:54.462570', 'step': 11458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:54.520132', 'step': 11458, 'epoch': 2} {'type': 'loss', 'content': 0.12744630873203278, 'timestamp': '2025-09-30 22:29:54.525084', 'step': 11459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:54.598289', 'step': 11459, 'epoch': 2} {'type': 'loss', 'content': 0.16796563565731049, 'timestamp': '2025-09-30 22:29:54.614146', 'step': 11460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:54.672276', 'step': 11460, 'epoch': 2} {'type': 'loss', 'content': 0.1225595697760582, 'timestamp': '2025-09-30 22:29:54.677768', 'step': 11461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:54.734568', 'step': 11461, 'epoch': 2} {'type': 'loss', 'content': 0.09424030780792236, 'timestamp': '2025-09-30 22:29:54.739097', 'step': 11462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:54.799556', 'step': 11462, 'epoch': 2} {'type': 'loss', 'content': 0.09723560512065887, 'timestamp': '2025-09-30 22:29:54.803679', 'step': 11463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:54.888097', 'step': 11463, 'epoch': 2} {'type': 'loss', 'content': 0.1032380536198616, 'timestamp': '2025-09-30 22:29:54.896463', 'step': 11464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:54.954917', 'step': 11464, 'epoch': 2} {'type': 'loss', 'content': 0.18037497997283936, 'timestamp': '2025-09-30 22:29:54.967322', 'step': 11465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:55.026295', 'step': 11465, 'epoch': 2} {'type': 'loss', 'content': 0.18315307796001434, 'timestamp': '2025-09-30 22:29:55.030414', 'step': 11466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:55.105976', 'step': 11466, 'epoch': 2} {'type': 'loss', 'content': 0.1543714553117752, 'timestamp': '2025-09-30 22:29:55.110727', 'step': 11467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:55.181876', 'step': 11467, 'epoch': 2} {'type': 'loss', 'content': 0.12222558259963989, 'timestamp': '2025-09-30 22:29:55.190274', 'step': 11468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:55.271009', 'step': 11468, 'epoch': 2} {'type': 'loss', 'content': 0.19872437417507172, 'timestamp': '2025-09-30 22:29:55.275252', 'step': 11469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:55.344956', 'step': 11469, 'epoch': 2} {'type': 'loss', 'content': 0.09461046755313873, 'timestamp': '2025-09-30 22:29:55.348685', 'step': 11470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:55.408367', 'step': 11470, 'epoch': 2} {'type': 'loss', 'content': 0.11617053300142288, 'timestamp': '2025-09-30 22:29:55.418161', 'step': 11471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:55.478822', 'step': 11471, 'epoch': 2} {'type': 'loss', 'content': 0.08395163714885712, 'timestamp': '2025-09-30 22:29:55.485472', 'step': 11472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:55.547496', 'step': 11472, 'epoch': 2} {'type': 'loss', 'content': 0.2610483467578888, 'timestamp': '2025-09-30 22:29:55.554267', 'step': 11473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:55.616077', 'step': 11473, 'epoch': 2} {'type': 'loss', 'content': 0.09556782245635986, 'timestamp': '2025-09-30 22:29:55.628432', 'step': 11474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:55.698361', 'step': 11474, 'epoch': 2} {'type': 'loss', 'content': 0.12194982916116714, 'timestamp': '2025-09-30 22:29:55.701784', 'step': 11475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:55.784590', 'step': 11475, 'epoch': 2} {'type': 'loss', 'content': 0.1423725038766861, 'timestamp': '2025-09-30 22:29:55.798311', 'step': 11476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:55.882649', 'step': 11476, 'epoch': 2} {'type': 'loss', 'content': 0.06280925869941711, 'timestamp': '2025-09-30 22:29:55.897874', 'step': 11477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:55.967364', 'step': 11477, 'epoch': 2} {'type': 'loss', 'content': 0.18045949935913086, 'timestamp': '2025-09-30 22:29:55.978130', 'step': 11478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:56.037962', 'step': 11478, 'epoch': 2} {'type': 'loss', 'content': 0.13934937119483948, 'timestamp': '2025-09-30 22:29:56.041848', 'step': 11479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:56.112214', 'step': 11479, 'epoch': 2} {'type': 'loss', 'content': 0.07277970761060715, 'timestamp': '2025-09-30 22:29:56.128963', 'step': 11480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:56.200836', 'step': 11480, 'epoch': 2} {'type': 'loss', 'content': 0.11335781961679459, 'timestamp': '2025-09-30 22:29:56.207216', 'step': 11481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:56.278617', 'step': 11481, 'epoch': 2} {'type': 'loss', 'content': 0.2666485607624054, 'timestamp': '2025-09-30 22:29:56.288894', 'step': 11482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:56.361714', 'step': 11482, 'epoch': 2} {'type': 'loss', 'content': 0.10986512899398804, 'timestamp': '2025-09-30 22:29:56.367840', 'step': 11483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:56.438046', 'step': 11483, 'epoch': 2} {'type': 'loss', 'content': 0.1403542309999466, 'timestamp': '2025-09-30 22:29:56.447709', 'step': 11484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:56.508496', 'step': 11484, 'epoch': 2} {'type': 'loss', 'content': 0.06305378675460815, 'timestamp': '2025-09-30 22:29:56.514822', 'step': 11485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:56.603671', 'step': 11485, 'epoch': 2} {'type': 'loss', 'content': 0.12564513087272644, 'timestamp': '2025-09-30 22:29:56.611306', 'step': 11486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:56.695079', 'step': 11486, 'epoch': 2} {'type': 'loss', 'content': 0.08356516808271408, 'timestamp': '2025-09-30 22:29:56.698675', 'step': 11487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:56.778159', 'step': 11487, 'epoch': 2} {'type': 'loss', 'content': 0.1027829572558403, 'timestamp': '2025-09-30 22:29:56.785886', 'step': 11488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:56.851051', 'step': 11488, 'epoch': 2} {'type': 'loss', 'content': 0.16750261187553406, 'timestamp': '2025-09-30 22:29:56.876879', 'step': 11489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:56.961133', 'step': 11489, 'epoch': 2} {'type': 'loss', 'content': 0.12985844910144806, 'timestamp': '2025-09-30 22:29:56.965501', 'step': 11490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:57.031742', 'step': 11490, 'epoch': 2} {'type': 'loss', 'content': 0.10679736733436584, 'timestamp': '2025-09-30 22:29:57.034171', 'step': 11491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:57.101629', 'step': 11491, 'epoch': 2} {'type': 'loss', 'content': 0.12468110024929047, 'timestamp': '2025-09-30 22:29:57.112676', 'step': 11492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:57.193273', 'step': 11492, 'epoch': 2} {'type': 'loss', 'content': 0.1588180959224701, 'timestamp': '2025-09-30 22:29:57.196945', 'step': 11493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:57.271286', 'step': 11493, 'epoch': 2} {'type': 'loss', 'content': 0.11468269675970078, 'timestamp': '2025-09-30 22:29:57.276029', 'step': 11494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:57.353168', 'step': 11494, 'epoch': 2} {'type': 'loss', 'content': 0.1303461194038391, 'timestamp': '2025-09-30 22:29:57.356039', 'step': 11495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:57.435972', 'step': 11495, 'epoch': 2} {'type': 'loss', 'content': 0.15444107353687286, 'timestamp': '2025-09-30 22:29:57.450811', 'step': 11496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:57.536397', 'step': 11496, 'epoch': 2} {'type': 'loss', 'content': 0.15429067611694336, 'timestamp': '2025-09-30 22:29:57.545363', 'step': 11497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:57.611439', 'step': 11497, 'epoch': 2} {'type': 'loss', 'content': 0.07459091395139694, 'timestamp': '2025-09-30 22:29:57.615213', 'step': 11498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:57.685920', 'step': 11498, 'epoch': 2} {'type': 'loss', 'content': 0.09874080121517181, 'timestamp': '2025-09-30 22:29:57.688822', 'step': 11499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:57.770373', 'step': 11499, 'epoch': 2} {'type': 'loss', 'content': 0.06605174392461777, 'timestamp': '2025-09-30 22:29:57.784354', 'step': 11500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11500', 'timestamp': '2025-09-30 22:29:58.267114', 'step': 11500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:58.329266', 'step': 11500, 'epoch': 2} {'type': 'loss', 'content': 0.13334514200687408, 'timestamp': '2025-09-30 22:29:58.332262', 'step': 11501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:58.391566', 'step': 11501, 'epoch': 2} {'type': 'loss', 'content': 0.08189679682254791, 'timestamp': '2025-09-30 22:29:58.394277', 'step': 11502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:58.454202', 'step': 11502, 'epoch': 2} {'type': 'loss', 'content': 0.16070684790611267, 'timestamp': '2025-09-30 22:29:58.465676', 'step': 11503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:58.533423', 'step': 11503, 'epoch': 2} {'type': 'loss', 'content': 0.09994644671678543, 'timestamp': '2025-09-30 22:29:58.540635', 'step': 11504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:58.598622', 'step': 11504, 'epoch': 2} {'type': 'loss', 'content': 0.10429602861404419, 'timestamp': '2025-09-30 22:29:58.601908', 'step': 11505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:58.673066', 'step': 11505, 'epoch': 2} {'type': 'loss', 'content': 0.05387267842888832, 'timestamp': '2025-09-30 22:29:58.676777', 'step': 11506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:58.735329', 'step': 11506, 'epoch': 2} {'type': 'loss', 'content': 0.10043308138847351, 'timestamp': '2025-09-30 22:29:58.742973', 'step': 11507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:58.806891', 'step': 11507, 'epoch': 2} {'type': 'loss', 'content': 0.19154879450798035, 'timestamp': '2025-09-30 22:29:58.813313', 'step': 11508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:58.870536', 'step': 11508, 'epoch': 2} {'type': 'loss', 'content': 0.07011450082063675, 'timestamp': '2025-09-30 22:29:58.873447', 'step': 11509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:58.933120', 'step': 11509, 'epoch': 2} {'type': 'loss', 'content': 0.17549170553684235, 'timestamp': '2025-09-30 22:29:58.936409', 'step': 11510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.005295', 'step': 11510, 'epoch': 2} {'type': 'loss', 'content': 0.08414991199970245, 'timestamp': '2025-09-30 22:29:59.020328', 'step': 11511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:59.079087', 'step': 11511, 'epoch': 2} {'type': 'loss', 'content': 0.14842748641967773, 'timestamp': '2025-09-30 22:29:59.085426', 'step': 11512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:59.142345', 'step': 11512, 'epoch': 2} {'type': 'loss', 'content': 0.06189214810729027, 'timestamp': '2025-09-30 22:29:59.145848', 'step': 11513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.204896', 'step': 11513, 'epoch': 2} {'type': 'loss', 'content': 0.16466239094734192, 'timestamp': '2025-09-30 22:29:59.208143', 'step': 11514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:29:59.265830', 'step': 11514, 'epoch': 2} {'type': 'loss', 'content': 0.11261742562055588, 'timestamp': '2025-09-30 22:29:59.276104', 'step': 11515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.340341', 'step': 11515, 'epoch': 2} {'type': 'loss', 'content': 0.09927642345428467, 'timestamp': '2025-09-30 22:29:59.347119', 'step': 11516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.403780', 'step': 11516, 'epoch': 2} {'type': 'loss', 'content': 0.18059135973453522, 'timestamp': '2025-09-30 22:29:59.406578', 'step': 11517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.462933', 'step': 11517, 'epoch': 2} {'type': 'loss', 'content': 0.1597873717546463, 'timestamp': '2025-09-30 22:29:59.465484', 'step': 11518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:29:59.522412', 'step': 11518, 'epoch': 2} {'type': 'loss', 'content': 0.1468965858221054, 'timestamp': '2025-09-30 22:29:59.529336', 'step': 11519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.596471', 'step': 11519, 'epoch': 2} {'type': 'loss', 'content': 0.16155177354812622, 'timestamp': '2025-09-30 22:29:59.603859', 'step': 11520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:29:59.660311', 'step': 11520, 'epoch': 2} {'type': 'loss', 'content': 0.18613024055957794, 'timestamp': '2025-09-30 22:29:59.663788', 'step': 11521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:59.729568', 'step': 11521, 'epoch': 2} {'type': 'loss', 'content': 0.14356699585914612, 'timestamp': '2025-09-30 22:29:59.732686', 'step': 11522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.790095', 'step': 11522, 'epoch': 2} {'type': 'loss', 'content': 0.1516418308019638, 'timestamp': '2025-09-30 22:29:59.795010', 'step': 11523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:29:59.851436', 'step': 11523, 'epoch': 2} {'type': 'loss', 'content': 0.2103044092655182, 'timestamp': '2025-09-30 22:29:59.857623', 'step': 11524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.922095', 'step': 11524, 'epoch': 2} {'type': 'loss', 'content': 0.0900280773639679, 'timestamp': '2025-09-30 22:29:59.926431', 'step': 11525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:29:59.995357', 'step': 11525, 'epoch': 2} {'type': 'loss', 'content': 0.14871618151664734, 'timestamp': '2025-09-30 22:29:59.998365', 'step': 11526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:00.060892', 'step': 11526, 'epoch': 2} {'type': 'loss', 'content': 0.10643991827964783, 'timestamp': '2025-09-30 22:30:00.068809', 'step': 11527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:00.131347', 'step': 11527, 'epoch': 2} {'type': 'loss', 'content': 0.10886551439762115, 'timestamp': '2025-09-30 22:30:00.137913', 'step': 11528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:00.194972', 'step': 11528, 'epoch': 2} {'type': 'loss', 'content': 0.14872051775455475, 'timestamp': '2025-09-30 22:30:00.198002', 'step': 11529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:00.263126', 'step': 11529, 'epoch': 2} {'type': 'loss', 'content': 0.0896439403295517, 'timestamp': '2025-09-30 22:30:00.273738', 'step': 11530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:00.344772', 'step': 11530, 'epoch': 2} {'type': 'loss', 'content': 0.08836433291435242, 'timestamp': '2025-09-30 22:30:00.347589', 'step': 11531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:00.413947', 'step': 11531, 'epoch': 2} {'type': 'loss', 'content': 0.11323381215333939, 'timestamp': '2025-09-30 22:30:00.434386', 'step': 11532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:00.492929', 'step': 11532, 'epoch': 2} {'type': 'loss', 'content': 0.12745089828968048, 'timestamp': '2025-09-30 22:30:00.501601', 'step': 11533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:00.559302', 'step': 11533, 'epoch': 2} {'type': 'loss', 'content': 0.06805123388767242, 'timestamp': '2025-09-30 22:30:00.572267', 'step': 11534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:00.630135', 'step': 11534, 'epoch': 2} {'type': 'loss', 'content': 0.13439203798770905, 'timestamp': '2025-09-30 22:30:00.633244', 'step': 11535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:00.698206', 'step': 11535, 'epoch': 2} {'type': 'loss', 'content': 0.14574339985847473, 'timestamp': '2025-09-30 22:30:00.710287', 'step': 11536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:00.766245', 'step': 11536, 'epoch': 2} {'type': 'loss', 'content': 0.16417232155799866, 'timestamp': '2025-09-30 22:30:00.770251', 'step': 11537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:00.837971', 'step': 11537, 'epoch': 2} {'type': 'loss', 'content': 0.10253265500068665, 'timestamp': '2025-09-30 22:30:00.843528', 'step': 11538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:00.913195', 'step': 11538, 'epoch': 2} {'type': 'loss', 'content': 0.07053211331367493, 'timestamp': '2025-09-30 22:30:00.936089', 'step': 11539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:00.995520', 'step': 11539, 'epoch': 2} {'type': 'loss', 'content': 0.2077503204345703, 'timestamp': '2025-09-30 22:30:01.004996', 'step': 11540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:01.065422', 'step': 11540, 'epoch': 2} {'type': 'loss', 'content': 0.11090931296348572, 'timestamp': '2025-09-30 22:30:01.080664', 'step': 11541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:01.141581', 'step': 11541, 'epoch': 2} {'type': 'loss', 'content': 0.1522173136472702, 'timestamp': '2025-09-30 22:30:01.155617', 'step': 11542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:01.223622', 'step': 11542, 'epoch': 2} {'type': 'loss', 'content': 0.06660587340593338, 'timestamp': '2025-09-30 22:30:01.236239', 'step': 11543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:01.303310', 'step': 11543, 'epoch': 2} {'type': 'loss', 'content': 0.12248532474040985, 'timestamp': '2025-09-30 22:30:01.310318', 'step': 11544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:01.378087', 'step': 11544, 'epoch': 2} {'type': 'loss', 'content': 0.14833582937717438, 'timestamp': '2025-09-30 22:30:01.382941', 'step': 11545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:01.447834', 'step': 11545, 'epoch': 2} {'type': 'loss', 'content': 0.11726053804159164, 'timestamp': '2025-09-30 22:30:01.450739', 'step': 11546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:01.507556', 'step': 11546, 'epoch': 2} {'type': 'loss', 'content': 0.15779057145118713, 'timestamp': '2025-09-30 22:30:01.510155', 'step': 11547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:01.566926', 'step': 11547, 'epoch': 2} {'type': 'loss', 'content': 0.1461886763572693, 'timestamp': '2025-09-30 22:30:01.575917', 'step': 11548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:01.645213', 'step': 11548, 'epoch': 2} {'type': 'loss', 'content': 0.0832914486527443, 'timestamp': '2025-09-30 22:30:01.649855', 'step': 11549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:01.707847', 'step': 11549, 'epoch': 2} {'type': 'loss', 'content': 0.030638456344604492, 'timestamp': '2025-09-30 22:30:01.710463', 'step': 11550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:01.779746', 'step': 11550, 'epoch': 2} {'type': 'loss', 'content': 0.057710859924554825, 'timestamp': '2025-09-30 22:30:01.782608', 'step': 11551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:01.848601', 'step': 11551, 'epoch': 2} {'type': 'loss', 'content': 0.0741989016532898, 'timestamp': '2025-09-30 22:30:01.855079', 'step': 11552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:01.912844', 'step': 11552, 'epoch': 2} {'type': 'loss', 'content': 0.11821983754634857, 'timestamp': '2025-09-30 22:30:01.915461', 'step': 11553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:01.974446', 'step': 11553, 'epoch': 2} {'type': 'loss', 'content': 0.06263953447341919, 'timestamp': '2025-09-30 22:30:01.978675', 'step': 11554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:02.046414', 'step': 11554, 'epoch': 2} {'type': 'loss', 'content': 0.11662060022354126, 'timestamp': '2025-09-30 22:30:02.049717', 'step': 11555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:02.117800', 'step': 11555, 'epoch': 2} {'type': 'loss', 'content': 0.1605033427476883, 'timestamp': '2025-09-30 22:30:02.127396', 'step': 11556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:02.195879', 'step': 11556, 'epoch': 2} {'type': 'loss', 'content': 0.09291519224643707, 'timestamp': '2025-09-30 22:30:02.201594', 'step': 11557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:02.260403', 'step': 11557, 'epoch': 2} {'type': 'loss', 'content': 0.11590202897787094, 'timestamp': '2025-09-30 22:30:02.265293', 'step': 11558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:02.326904', 'step': 11558, 'epoch': 2} {'type': 'loss', 'content': 0.17864179611206055, 'timestamp': '2025-09-30 22:30:02.331950', 'step': 11559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:02.392595', 'step': 11559, 'epoch': 2} {'type': 'loss', 'content': 0.100773885846138, 'timestamp': '2025-09-30 22:30:02.400792', 'step': 11560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:02.459941', 'step': 11560, 'epoch': 2} {'type': 'loss', 'content': 0.11348924040794373, 'timestamp': '2025-09-30 22:30:02.462723', 'step': 11561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:02.530468', 'step': 11561, 'epoch': 2} {'type': 'loss', 'content': 0.11300886422395706, 'timestamp': '2025-09-30 22:30:02.534740', 'step': 11562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:02.594134', 'step': 11562, 'epoch': 2} {'type': 'loss', 'content': 0.14233636856079102, 'timestamp': '2025-09-30 22:30:02.597296', 'step': 11563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:02.657075', 'step': 11563, 'epoch': 2} {'type': 'loss', 'content': 0.10244578123092651, 'timestamp': '2025-09-30 22:30:02.663056', 'step': 11564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:02.728749', 'step': 11564, 'epoch': 2} {'type': 'loss', 'content': 0.07058671861886978, 'timestamp': '2025-09-30 22:30:02.731281', 'step': 11565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:02.788605', 'step': 11565, 'epoch': 2} {'type': 'loss', 'content': 0.13875871896743774, 'timestamp': '2025-09-30 22:30:02.792716', 'step': 11566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:02.861050', 'step': 11566, 'epoch': 2} {'type': 'loss', 'content': 0.1490798145532608, 'timestamp': '2025-09-30 22:30:02.865208', 'step': 11567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:02.941991', 'step': 11567, 'epoch': 2} {'type': 'loss', 'content': 0.11350701004266739, 'timestamp': '2025-09-30 22:30:02.948823', 'step': 11568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:03.012055', 'step': 11568, 'epoch': 2} {'type': 'loss', 'content': 0.11356731504201889, 'timestamp': '2025-09-30 22:30:03.014782', 'step': 11569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:03.072220', 'step': 11569, 'epoch': 2} {'type': 'loss', 'content': 0.11119336634874344, 'timestamp': '2025-09-30 22:30:03.075381', 'step': 11570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:03.133993', 'step': 11570, 'epoch': 2} {'type': 'loss', 'content': 0.21428713202476501, 'timestamp': '2025-09-30 22:30:03.137501', 'step': 11571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:03.194384', 'step': 11571, 'epoch': 2} {'type': 'loss', 'content': 0.08996172994375229, 'timestamp': '2025-09-30 22:30:03.202702', 'step': 11572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:03.259575', 'step': 11572, 'epoch': 2} {'type': 'loss', 'content': 0.1118423193693161, 'timestamp': '2025-09-30 22:30:03.277213', 'step': 11573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:03.335941', 'step': 11573, 'epoch': 2} {'type': 'loss', 'content': 0.09159768372774124, 'timestamp': '2025-09-30 22:30:03.340418', 'step': 11574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:03.398420', 'step': 11574, 'epoch': 2} {'type': 'loss', 'content': 0.11881828308105469, 'timestamp': '2025-09-30 22:30:03.401907', 'step': 11575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:03.460172', 'step': 11575, 'epoch': 2} {'type': 'loss', 'content': 0.03404972329735756, 'timestamp': '2025-09-30 22:30:03.467197', 'step': 11576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:03.523091', 'step': 11576, 'epoch': 2} {'type': 'loss', 'content': 0.1531505137681961, 'timestamp': '2025-09-30 22:30:03.532114', 'step': 11577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:03.590564', 'step': 11577, 'epoch': 2} {'type': 'loss', 'content': 0.06479223072528839, 'timestamp': '2025-09-30 22:30:03.593692', 'step': 11578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:03.651583', 'step': 11578, 'epoch': 2} {'type': 'loss', 'content': 0.10372431576251984, 'timestamp': '2025-09-30 22:30:03.654749', 'step': 11579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:03.713386', 'step': 11579, 'epoch': 2} {'type': 'loss', 'content': 0.07570259273052216, 'timestamp': '2025-09-30 22:30:03.724843', 'step': 11580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:03.790332', 'step': 11580, 'epoch': 2} {'type': 'loss', 'content': 0.1834304928779602, 'timestamp': '2025-09-30 22:30:03.797206', 'step': 11581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:03.858724', 'step': 11581, 'epoch': 2} {'type': 'loss', 'content': 0.10290253907442093, 'timestamp': '2025-09-30 22:30:03.862042', 'step': 11582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:03.922908', 'step': 11582, 'epoch': 2} {'type': 'loss', 'content': 0.11749234795570374, 'timestamp': '2025-09-30 22:30:03.927889', 'step': 11583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:03.985747', 'step': 11583, 'epoch': 2} {'type': 'loss', 'content': 0.1359221190214157, 'timestamp': '2025-09-30 22:30:03.992354', 'step': 11584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:04.052944', 'step': 11584, 'epoch': 2} {'type': 'loss', 'content': 0.13955700397491455, 'timestamp': '2025-09-30 22:30:04.056215', 'step': 11585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:04.129907', 'step': 11585, 'epoch': 2} {'type': 'loss', 'content': 0.18664443492889404, 'timestamp': '2025-09-30 22:30:04.138111', 'step': 11586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:04.203983', 'step': 11586, 'epoch': 2} {'type': 'loss', 'content': 0.10117417573928833, 'timestamp': '2025-09-30 22:30:04.206601', 'step': 11587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:04.264917', 'step': 11587, 'epoch': 2} {'type': 'loss', 'content': 0.08292526751756668, 'timestamp': '2025-09-30 22:30:04.277378', 'step': 11588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:04.344095', 'step': 11588, 'epoch': 2} {'type': 'loss', 'content': 0.044086892157793045, 'timestamp': '2025-09-30 22:30:04.349734', 'step': 11589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:04.412872', 'step': 11589, 'epoch': 2} {'type': 'loss', 'content': 0.15951651334762573, 'timestamp': '2025-09-30 22:30:04.415459', 'step': 11590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:04.476929', 'step': 11590, 'epoch': 2} {'type': 'loss', 'content': 0.16161340475082397, 'timestamp': '2025-09-30 22:30:04.479645', 'step': 11591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:04.540258', 'step': 11591, 'epoch': 2} {'type': 'loss', 'content': 0.0999172106385231, 'timestamp': '2025-09-30 22:30:04.546414', 'step': 11592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:04.612893', 'step': 11592, 'epoch': 2} {'type': 'loss', 'content': 0.15987516939640045, 'timestamp': '2025-09-30 22:30:04.616561', 'step': 11593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:04.724684', 'step': 11593, 'epoch': 2} {'type': 'loss', 'content': 0.17450352013111115, 'timestamp': '2025-09-30 22:30:04.733451', 'step': 11594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:04.834000', 'step': 11594, 'epoch': 2} {'type': 'loss', 'content': 0.10109851509332657, 'timestamp': '2025-09-30 22:30:04.839932', 'step': 11595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:04.919086', 'step': 11595, 'epoch': 2} {'type': 'loss', 'content': 0.17471212148666382, 'timestamp': '2025-09-30 22:30:04.925609', 'step': 11596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:05.001849', 'step': 11596, 'epoch': 2} {'type': 'loss', 'content': 0.08252448588609695, 'timestamp': '2025-09-30 22:30:05.011598', 'step': 11597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:05.077127', 'step': 11597, 'epoch': 2} {'type': 'loss', 'content': 0.16038553416728973, 'timestamp': '2025-09-30 22:30:05.082197', 'step': 11598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:05.149647', 'step': 11598, 'epoch': 2} {'type': 'loss', 'content': 0.1612718403339386, 'timestamp': '2025-09-30 22:30:05.152784', 'step': 11599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:05.221067', 'step': 11599, 'epoch': 2} {'type': 'loss', 'content': 0.06280884891748428, 'timestamp': '2025-09-30 22:30:05.228915', 'step': 11600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:05.286188', 'step': 11600, 'epoch': 2} {'type': 'loss', 'content': 0.08283071219921112, 'timestamp': '2025-09-30 22:30:05.289172', 'step': 11601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:05.347611', 'step': 11601, 'epoch': 2} {'type': 'loss', 'content': 0.05973229184746742, 'timestamp': '2025-09-30 22:30:05.351898', 'step': 11602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:05.410289', 'step': 11602, 'epoch': 2} {'type': 'loss', 'content': 0.10227543115615845, 'timestamp': '2025-09-30 22:30:05.418757', 'step': 11603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:05.481081', 'step': 11603, 'epoch': 2} {'type': 'loss', 'content': 0.13005474209785461, 'timestamp': '2025-09-30 22:30:05.489390', 'step': 11604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:05.546346', 'step': 11604, 'epoch': 2} {'type': 'loss', 'content': 0.12158461660146713, 'timestamp': '2025-09-30 22:30:05.549245', 'step': 11605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:05.609529', 'step': 11605, 'epoch': 2} {'type': 'loss', 'content': 0.12326110154390335, 'timestamp': '2025-09-30 22:30:05.612497', 'step': 11606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:05.672974', 'step': 11606, 'epoch': 2} {'type': 'loss', 'content': 0.09950634092092514, 'timestamp': '2025-09-30 22:30:05.678154', 'step': 11607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:05.744741', 'step': 11607, 'epoch': 2} {'type': 'loss', 'content': 0.0882641151547432, 'timestamp': '2025-09-30 22:30:05.752530', 'step': 11608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:05.820364', 'step': 11608, 'epoch': 2} {'type': 'loss', 'content': 0.06701279431581497, 'timestamp': '2025-09-30 22:30:05.823499', 'step': 11609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:05.880152', 'step': 11609, 'epoch': 2} {'type': 'loss', 'content': 0.11809783428907394, 'timestamp': '2025-09-30 22:30:05.883685', 'step': 11610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:05.952266', 'step': 11610, 'epoch': 2} {'type': 'loss', 'content': 0.13945233821868896, 'timestamp': '2025-09-30 22:30:05.967217', 'step': 11611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:06.039612', 'step': 11611, 'epoch': 2} {'type': 'loss', 'content': 0.05628804862499237, 'timestamp': '2025-09-30 22:30:06.048600', 'step': 11612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:06.112826', 'step': 11612, 'epoch': 2} {'type': 'loss', 'content': 0.12013457715511322, 'timestamp': '2025-09-30 22:30:06.122896', 'step': 11613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:06.190508', 'step': 11613, 'epoch': 2} {'type': 'loss', 'content': 0.1362173855304718, 'timestamp': '2025-09-30 22:30:06.202393', 'step': 11614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:06.280901', 'step': 11614, 'epoch': 2} {'type': 'loss', 'content': 0.04583965614438057, 'timestamp': '2025-09-30 22:30:06.292186', 'step': 11615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:06.356324', 'step': 11615, 'epoch': 2} {'type': 'loss', 'content': 0.09736651182174683, 'timestamp': '2025-09-30 22:30:06.370910', 'step': 11616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:06.428201', 'step': 11616, 'epoch': 2} {'type': 'loss', 'content': 0.17356257140636444, 'timestamp': '2025-09-30 22:30:06.431761', 'step': 11617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:06.489643', 'step': 11617, 'epoch': 2} {'type': 'loss', 'content': 0.10928355157375336, 'timestamp': '2025-09-30 22:30:06.504899', 'step': 11618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:06.571364', 'step': 11618, 'epoch': 2} {'type': 'loss', 'content': 0.12386973202228546, 'timestamp': '2025-09-30 22:30:06.574457', 'step': 11619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:06.633404', 'step': 11619, 'epoch': 2} {'type': 'loss', 'content': 0.1598977893590927, 'timestamp': '2025-09-30 22:30:06.640831', 'step': 11620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:06.703725', 'step': 11620, 'epoch': 2} {'type': 'loss', 'content': 0.10263939201831818, 'timestamp': '2025-09-30 22:30:06.707763', 'step': 11621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:06.798619', 'step': 11621, 'epoch': 2} {'type': 'loss', 'content': 0.12694938480854034, 'timestamp': '2025-09-30 22:30:06.802382', 'step': 11622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:06.875386', 'step': 11622, 'epoch': 2} {'type': 'loss', 'content': 0.13352936506271362, 'timestamp': '2025-09-30 22:30:06.881458', 'step': 11623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:06.942583', 'step': 11623, 'epoch': 2} {'type': 'loss', 'content': 0.038363877683877945, 'timestamp': '2025-09-30 22:30:06.950629', 'step': 11624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:07.012006', 'step': 11624, 'epoch': 2} {'type': 'loss', 'content': 0.09439212083816528, 'timestamp': '2025-09-30 22:30:07.018500', 'step': 11625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:07.077085', 'step': 11625, 'epoch': 2} {'type': 'loss', 'content': 0.1265651136636734, 'timestamp': '2025-09-30 22:30:07.090219', 'step': 11626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:07.157995', 'step': 11626, 'epoch': 2} {'type': 'loss', 'content': 0.12337912619113922, 'timestamp': '2025-09-30 22:30:07.174657', 'step': 11627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:07.248538', 'step': 11627, 'epoch': 2} {'type': 'loss', 'content': 0.06327325105667114, 'timestamp': '2025-09-30 22:30:07.257441', 'step': 11628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:07.318551', 'step': 11628, 'epoch': 2} {'type': 'loss', 'content': 0.06526099890470505, 'timestamp': '2025-09-30 22:30:07.322593', 'step': 11629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:07.380612', 'step': 11629, 'epoch': 2} {'type': 'loss', 'content': 0.09792262315750122, 'timestamp': '2025-09-30 22:30:07.387026', 'step': 11630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:07.453998', 'step': 11630, 'epoch': 2} {'type': 'loss', 'content': 0.043240878731012344, 'timestamp': '2025-09-30 22:30:07.459193', 'step': 11631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:07.517910', 'step': 11631, 'epoch': 2} {'type': 'loss', 'content': 0.11177856475114822, 'timestamp': '2025-09-30 22:30:07.525091', 'step': 11632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:07.584445', 'step': 11632, 'epoch': 2} {'type': 'loss', 'content': 0.07440830767154694, 'timestamp': '2025-09-30 22:30:07.587395', 'step': 11633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:07.645747', 'step': 11633, 'epoch': 2} {'type': 'loss', 'content': 0.07616008073091507, 'timestamp': '2025-09-30 22:30:07.649629', 'step': 11634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:07.708840', 'step': 11634, 'epoch': 2} {'type': 'loss', 'content': 0.07439908385276794, 'timestamp': '2025-09-30 22:30:07.712926', 'step': 11635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:07.775221', 'step': 11635, 'epoch': 2} {'type': 'loss', 'content': 0.0671721026301384, 'timestamp': '2025-09-30 22:30:07.782695', 'step': 11636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:07.841998', 'step': 11636, 'epoch': 2} {'type': 'loss', 'content': 0.10956703126430511, 'timestamp': '2025-09-30 22:30:07.845342', 'step': 11637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:07.903403', 'step': 11637, 'epoch': 2} {'type': 'loss', 'content': 0.1434558629989624, 'timestamp': '2025-09-30 22:30:07.915710', 'step': 11638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:07.990534', 'step': 11638, 'epoch': 2} {'type': 'loss', 'content': 0.14880046248435974, 'timestamp': '2025-09-30 22:30:07.995261', 'step': 11639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:08.054479', 'step': 11639, 'epoch': 2} {'type': 'loss', 'content': 0.10153549164533615, 'timestamp': '2025-09-30 22:30:08.062007', 'step': 11640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:08.121936', 'step': 11640, 'epoch': 2} {'type': 'loss', 'content': 0.04355425387620926, 'timestamp': '2025-09-30 22:30:08.124740', 'step': 11641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:08.184398', 'step': 11641, 'epoch': 2} {'type': 'loss', 'content': 0.10297891497612, 'timestamp': '2025-09-30 22:30:08.192946', 'step': 11642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:08.279686', 'step': 11642, 'epoch': 2} {'type': 'loss', 'content': 0.17711494863033295, 'timestamp': '2025-09-30 22:30:08.294244', 'step': 11643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:08.377016', 'step': 11643, 'epoch': 2} {'type': 'loss', 'content': 0.08373398333787918, 'timestamp': '2025-09-30 22:30:08.384122', 'step': 11644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:08.444000', 'step': 11644, 'epoch': 2} {'type': 'loss', 'content': 0.1247948408126831, 'timestamp': '2025-09-30 22:30:08.448504', 'step': 11645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:08.506387', 'step': 11645, 'epoch': 2} {'type': 'loss', 'content': 0.06059581786394119, 'timestamp': '2025-09-30 22:30:08.509840', 'step': 11646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:08.581941', 'step': 11646, 'epoch': 2} {'type': 'loss', 'content': 0.09619747847318649, 'timestamp': '2025-09-30 22:30:08.586567', 'step': 11647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:08.655651', 'step': 11647, 'epoch': 2} {'type': 'loss', 'content': 0.06914161890745163, 'timestamp': '2025-09-30 22:30:08.663156', 'step': 11648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:08.719664', 'step': 11648, 'epoch': 2} {'type': 'loss', 'content': 0.06939475238323212, 'timestamp': '2025-09-30 22:30:08.724072', 'step': 11649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:08.781642', 'step': 11649, 'epoch': 2} {'type': 'loss', 'content': 0.14306087791919708, 'timestamp': '2025-09-30 22:30:08.786557', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:30:23.472261', 'step': 11650, 'epoch': 2} {'type': 'pplx', 'content': 13476.71478534944, 'timestamp': '2025-09-30 22:30:23.481107', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:23.541141', 'step': 11650, 'epoch': 2} {'type': 'loss', 'content': 0.08086401969194412, 'timestamp': '2025-09-30 22:30:23.546349', 'step': 11651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:23.605756', 'step': 11651, 'epoch': 2} {'type': 'loss', 'content': 0.12515144050121307, 'timestamp': '2025-09-30 22:30:23.613419', 'step': 11652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:23.672749', 'step': 11652, 'epoch': 2} {'type': 'loss', 'content': 0.13082362711429596, 'timestamp': '2025-09-30 22:30:23.675427', 'step': 11653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:23.733137', 'step': 11653, 'epoch': 2} {'type': 'loss', 'content': 0.16738273203372955, 'timestamp': '2025-09-30 22:30:23.736249', 'step': 11654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:23.794128', 'step': 11654, 'epoch': 2} {'type': 'loss', 'content': 0.09585260599851608, 'timestamp': '2025-09-30 22:30:23.806710', 'step': 11655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:23.864327', 'step': 11655, 'epoch': 2} {'type': 'loss', 'content': 0.10947251319885254, 'timestamp': '2025-09-30 22:30:23.870687', 'step': 11656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:23.926974', 'step': 11656, 'epoch': 2} {'type': 'loss', 'content': 0.11527477204799652, 'timestamp': '2025-09-30 22:30:23.931424', 'step': 11657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:23.989441', 'step': 11657, 'epoch': 2} {'type': 'loss', 'content': 0.08705804497003555, 'timestamp': '2025-09-30 22:30:24.003126', 'step': 11658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:24.061640', 'step': 11658, 'epoch': 2} {'type': 'loss', 'content': 0.10284338146448135, 'timestamp': '2025-09-30 22:30:24.066303', 'step': 11659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:24.126961', 'step': 11659, 'epoch': 2} {'type': 'loss', 'content': 0.11144927889108658, 'timestamp': '2025-09-30 22:30:24.134759', 'step': 11660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:24.193024', 'step': 11660, 'epoch': 2} {'type': 'loss', 'content': 0.08599196374416351, 'timestamp': '2025-09-30 22:30:24.200638', 'step': 11661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:24.258966', 'step': 11661, 'epoch': 2} {'type': 'loss', 'content': 0.23333647847175598, 'timestamp': '2025-09-30 22:30:24.274546', 'step': 11662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:24.343332', 'step': 11662, 'epoch': 2} {'type': 'loss', 'content': 0.14232683181762695, 'timestamp': '2025-09-30 22:30:24.356590', 'step': 11663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:24.424207', 'step': 11663, 'epoch': 2} {'type': 'loss', 'content': 0.2022761106491089, 'timestamp': '2025-09-30 22:30:24.441948', 'step': 11664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:24.499995', 'step': 11664, 'epoch': 2} {'type': 'loss', 'content': 0.09633591771125793, 'timestamp': '2025-09-30 22:30:24.505796', 'step': 11665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:24.566658', 'step': 11665, 'epoch': 2} {'type': 'loss', 'content': 0.12889111042022705, 'timestamp': '2025-09-30 22:30:24.574126', 'step': 11666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:24.631927', 'step': 11666, 'epoch': 2} {'type': 'loss', 'content': 0.08944886922836304, 'timestamp': '2025-09-30 22:30:24.636644', 'step': 11667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:24.696075', 'step': 11667, 'epoch': 2} {'type': 'loss', 'content': 0.09173540771007538, 'timestamp': '2025-09-30 22:30:24.703712', 'step': 11668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:24.775768', 'step': 11668, 'epoch': 2} {'type': 'loss', 'content': 0.18204250931739807, 'timestamp': '2025-09-30 22:30:24.788367', 'step': 11669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:24.846352', 'step': 11669, 'epoch': 2} {'type': 'loss', 'content': 0.15644748508930206, 'timestamp': '2025-09-30 22:30:24.849761', 'step': 11670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:24.908786', 'step': 11670, 'epoch': 2} {'type': 'loss', 'content': 0.06512481719255447, 'timestamp': '2025-09-30 22:30:24.912271', 'step': 11671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:24.980687', 'step': 11671, 'epoch': 2} {'type': 'loss', 'content': 0.2468971461057663, 'timestamp': '2025-09-30 22:30:24.991660', 'step': 11672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:25.052287', 'step': 11672, 'epoch': 2} {'type': 'loss', 'content': 0.0769590511918068, 'timestamp': '2025-09-30 22:30:25.056793', 'step': 11673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:25.118239', 'step': 11673, 'epoch': 2} {'type': 'loss', 'content': 0.07195758819580078, 'timestamp': '2025-09-30 22:30:25.122092', 'step': 11674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:25.180471', 'step': 11674, 'epoch': 2} {'type': 'loss', 'content': 0.14260463416576385, 'timestamp': '2025-09-30 22:30:25.184381', 'step': 11675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:25.252481', 'step': 11675, 'epoch': 2} {'type': 'loss', 'content': 0.1377228945493698, 'timestamp': '2025-09-30 22:30:25.260422', 'step': 11676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:25.336389', 'step': 11676, 'epoch': 2} {'type': 'loss', 'content': 0.13334883749485016, 'timestamp': '2025-09-30 22:30:25.342121', 'step': 11677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:25.400983', 'step': 11677, 'epoch': 2} {'type': 'loss', 'content': 0.15320613980293274, 'timestamp': '2025-09-30 22:30:25.404770', 'step': 11678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:25.479051', 'step': 11678, 'epoch': 2} {'type': 'loss', 'content': 0.14386694133281708, 'timestamp': '2025-09-30 22:30:25.500179', 'step': 11679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:25.558300', 'step': 11679, 'epoch': 2} {'type': 'loss', 'content': 0.10846388339996338, 'timestamp': '2025-09-30 22:30:25.566411', 'step': 11680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:25.626406', 'step': 11680, 'epoch': 2} {'type': 'loss', 'content': 0.06836627423763275, 'timestamp': '2025-09-30 22:30:25.631785', 'step': 11681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:25.690547', 'step': 11681, 'epoch': 2} {'type': 'loss', 'content': 0.08751020580530167, 'timestamp': '2025-09-30 22:30:25.694541', 'step': 11682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:25.754995', 'step': 11682, 'epoch': 2} {'type': 'loss', 'content': 0.2082715779542923, 'timestamp': '2025-09-30 22:30:25.759565', 'step': 11683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:25.820937', 'step': 11683, 'epoch': 2} {'type': 'loss', 'content': 0.16243457794189453, 'timestamp': '2025-09-30 22:30:25.829525', 'step': 11684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:25.887150', 'step': 11684, 'epoch': 2} {'type': 'loss', 'content': 0.10184171795845032, 'timestamp': '2025-09-30 22:30:25.903223', 'step': 11685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:25.961417', 'step': 11685, 'epoch': 2} {'type': 'loss', 'content': 0.1735546588897705, 'timestamp': '2025-09-30 22:30:25.966801', 'step': 11686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:26.027902', 'step': 11686, 'epoch': 2} {'type': 'loss', 'content': 0.20198331773281097, 'timestamp': '2025-09-30 22:30:26.034143', 'step': 11687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:26.095110', 'step': 11687, 'epoch': 2} {'type': 'loss', 'content': 0.14165882766246796, 'timestamp': '2025-09-30 22:30:26.103494', 'step': 11688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:26.176102', 'step': 11688, 'epoch': 2} {'type': 'loss', 'content': 0.04884372651576996, 'timestamp': '2025-09-30 22:30:26.179618', 'step': 11689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:26.245166', 'step': 11689, 'epoch': 2} {'type': 'loss', 'content': 0.08876904845237732, 'timestamp': '2025-09-30 22:30:26.248681', 'step': 11690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:26.307782', 'step': 11690, 'epoch': 2} {'type': 'loss', 'content': 0.08830075711011887, 'timestamp': '2025-09-30 22:30:26.319160', 'step': 11691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:26.378520', 'step': 11691, 'epoch': 2} {'type': 'loss', 'content': 0.11769428849220276, 'timestamp': '2025-09-30 22:30:26.391978', 'step': 11692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:26.457945', 'step': 11692, 'epoch': 2} {'type': 'loss', 'content': 0.16094212234020233, 'timestamp': '2025-09-30 22:30:26.461153', 'step': 11693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:26.520022', 'step': 11693, 'epoch': 2} {'type': 'loss', 'content': 0.18124505877494812, 'timestamp': '2025-09-30 22:30:26.523546', 'step': 11694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:26.581290', 'step': 11694, 'epoch': 2} {'type': 'loss', 'content': 0.20438215136528015, 'timestamp': '2025-09-30 22:30:26.583872', 'step': 11695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:26.641377', 'step': 11695, 'epoch': 2} {'type': 'loss', 'content': 0.18377918004989624, 'timestamp': '2025-09-30 22:30:26.650055', 'step': 11696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:26.708316', 'step': 11696, 'epoch': 2} {'type': 'loss', 'content': 0.11258067190647125, 'timestamp': '2025-09-30 22:30:26.711728', 'step': 11697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:26.769169', 'step': 11697, 'epoch': 2} {'type': 'loss', 'content': 0.11229769885540009, 'timestamp': '2025-09-30 22:30:26.777896', 'step': 11698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:26.839837', 'step': 11698, 'epoch': 2} {'type': 'loss', 'content': 0.08855465054512024, 'timestamp': '2025-09-30 22:30:26.843281', 'step': 11699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:26.900642', 'step': 11699, 'epoch': 2} {'type': 'loss', 'content': 0.029547953978180885, 'timestamp': '2025-09-30 22:30:26.907463', 'step': 11700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:26.965510', 'step': 11700, 'epoch': 2} {'type': 'loss', 'content': 0.06715597957372665, 'timestamp': '2025-09-30 22:30:26.968754', 'step': 11701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:27.026970', 'step': 11701, 'epoch': 2} {'type': 'loss', 'content': 0.08966577798128128, 'timestamp': '2025-09-30 22:30:27.030801', 'step': 11702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:27.088184', 'step': 11702, 'epoch': 2} {'type': 'loss', 'content': 0.17226913571357727, 'timestamp': '2025-09-30 22:30:27.092768', 'step': 11703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:27.151559', 'step': 11703, 'epoch': 2} {'type': 'loss', 'content': 0.1323905885219574, 'timestamp': '2025-09-30 22:30:27.160087', 'step': 11704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:27.216744', 'step': 11704, 'epoch': 2} {'type': 'loss', 'content': 0.08660127222537994, 'timestamp': '2025-09-30 22:30:27.221279', 'step': 11705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:27.285513', 'step': 11705, 'epoch': 2} {'type': 'loss', 'content': 0.13234353065490723, 'timestamp': '2025-09-30 22:30:27.290822', 'step': 11706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:27.352582', 'step': 11706, 'epoch': 2} {'type': 'loss', 'content': 0.21723419427871704, 'timestamp': '2025-09-30 22:30:27.366598', 'step': 11707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:27.425132', 'step': 11707, 'epoch': 2} {'type': 'loss', 'content': 0.14132270216941833, 'timestamp': '2025-09-30 22:30:27.432287', 'step': 11708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:27.490647', 'step': 11708, 'epoch': 2} {'type': 'loss', 'content': 0.23246465623378754, 'timestamp': '2025-09-30 22:30:27.494163', 'step': 11709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:27.551827', 'step': 11709, 'epoch': 2} {'type': 'loss', 'content': 0.22570101916790009, 'timestamp': '2025-09-30 22:30:27.566115', 'step': 11710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:27.624994', 'step': 11710, 'epoch': 2} {'type': 'loss', 'content': 0.12848366796970367, 'timestamp': '2025-09-30 22:30:27.629084', 'step': 11711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:27.690765', 'step': 11711, 'epoch': 2} {'type': 'loss', 'content': 0.07095641642808914, 'timestamp': '2025-09-30 22:30:27.698242', 'step': 11712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:27.766297', 'step': 11712, 'epoch': 2} {'type': 'loss', 'content': 0.09319645166397095, 'timestamp': '2025-09-30 22:30:27.769873', 'step': 11713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:27.838672', 'step': 11713, 'epoch': 2} {'type': 'loss', 'content': 0.15541507303714752, 'timestamp': '2025-09-30 22:30:27.842795', 'step': 11714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:27.901472', 'step': 11714, 'epoch': 2} {'type': 'loss', 'content': 0.1281208097934723, 'timestamp': '2025-09-30 22:30:27.904530', 'step': 11715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:30:27.963953', 'step': 11715, 'epoch': 2} {'type': 'loss', 'content': 0.09128585457801819, 'timestamp': '2025-09-30 22:30:27.973330', 'step': 11716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:28.033420', 'step': 11716, 'epoch': 2} {'type': 'loss', 'content': 0.1789552867412567, 'timestamp': '2025-09-30 22:30:28.036998', 'step': 11717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:28.116846', 'step': 11717, 'epoch': 2} {'type': 'loss', 'content': 0.23587016761302948, 'timestamp': '2025-09-30 22:30:28.121075', 'step': 11718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:28.179579', 'step': 11718, 'epoch': 2} {'type': 'loss', 'content': 0.10990332812070847, 'timestamp': '2025-09-30 22:30:28.183499', 'step': 11719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:28.242256', 'step': 11719, 'epoch': 2} {'type': 'loss', 'content': 0.1041938066482544, 'timestamp': '2025-09-30 22:30:28.251060', 'step': 11720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:28.310070', 'step': 11720, 'epoch': 2} {'type': 'loss', 'content': 0.1706903725862503, 'timestamp': '2025-09-30 22:30:28.324894', 'step': 11721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:28.390143', 'step': 11721, 'epoch': 2} {'type': 'loss', 'content': 0.12427264451980591, 'timestamp': '2025-09-30 22:30:28.393782', 'step': 11722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:28.452717', 'step': 11722, 'epoch': 2} {'type': 'loss', 'content': 0.13083556294441223, 'timestamp': '2025-09-30 22:30:28.467287', 'step': 11723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:28.525287', 'step': 11723, 'epoch': 2} {'type': 'loss', 'content': 0.07305610179901123, 'timestamp': '2025-09-30 22:30:28.532801', 'step': 11724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:28.589028', 'step': 11724, 'epoch': 2} {'type': 'loss', 'content': 0.08523036539554596, 'timestamp': '2025-09-30 22:30:28.605164', 'step': 11725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:28.676625', 'step': 11725, 'epoch': 2} {'type': 'loss', 'content': 0.20653246343135834, 'timestamp': '2025-09-30 22:30:28.680963', 'step': 11726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:28.755286', 'step': 11726, 'epoch': 2} {'type': 'loss', 'content': 0.07635463029146194, 'timestamp': '2025-09-30 22:30:28.760138', 'step': 11727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:28.821680', 'step': 11727, 'epoch': 2} {'type': 'loss', 'content': 0.15795931220054626, 'timestamp': '2025-09-30 22:30:28.840459', 'step': 11728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:28.908302', 'step': 11728, 'epoch': 2} {'type': 'loss', 'content': 0.1759602576494217, 'timestamp': '2025-09-30 22:30:28.924533', 'step': 11729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:28.995379', 'step': 11729, 'epoch': 2} {'type': 'loss', 'content': 0.07471466809511185, 'timestamp': '2025-09-30 22:30:28.999380', 'step': 11730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:29.070259', 'step': 11730, 'epoch': 2} {'type': 'loss', 'content': 0.09821044653654099, 'timestamp': '2025-09-30 22:30:29.074335', 'step': 11731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:29.142784', 'step': 11731, 'epoch': 2} {'type': 'loss', 'content': 0.12308494746685028, 'timestamp': '2025-09-30 22:30:29.150760', 'step': 11732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:29.225410', 'step': 11732, 'epoch': 2} {'type': 'loss', 'content': 0.12353918701410294, 'timestamp': '2025-09-30 22:30:29.229683', 'step': 11733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:29.289408', 'step': 11733, 'epoch': 2} {'type': 'loss', 'content': 0.19098657369613647, 'timestamp': '2025-09-30 22:30:29.294541', 'step': 11734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:29.354839', 'step': 11734, 'epoch': 2} {'type': 'loss', 'content': 0.11452766507863998, 'timestamp': '2025-09-30 22:30:29.360405', 'step': 11735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:29.420432', 'step': 11735, 'epoch': 2} {'type': 'loss', 'content': 0.11804081499576569, 'timestamp': '2025-09-30 22:30:29.427743', 'step': 11736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:29.487772', 'step': 11736, 'epoch': 2} {'type': 'loss', 'content': 0.08534811437129974, 'timestamp': '2025-09-30 22:30:29.492421', 'step': 11737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:29.554459', 'step': 11737, 'epoch': 2} {'type': 'loss', 'content': 0.05054134130477905, 'timestamp': '2025-09-30 22:30:29.558657', 'step': 11738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:29.619603', 'step': 11738, 'epoch': 2} {'type': 'loss', 'content': 0.14171601831912994, 'timestamp': '2025-09-30 22:30:29.625787', 'step': 11739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:29.685350', 'step': 11739, 'epoch': 2} {'type': 'loss', 'content': 0.19558179378509521, 'timestamp': '2025-09-30 22:30:29.705116', 'step': 11740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:29.769064', 'step': 11740, 'epoch': 2} {'type': 'loss', 'content': 0.09998959302902222, 'timestamp': '2025-09-30 22:30:29.773752', 'step': 11741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:29.845609', 'step': 11741, 'epoch': 2} {'type': 'loss', 'content': 0.06358825415372849, 'timestamp': '2025-09-30 22:30:29.850401', 'step': 11742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:29.928436', 'step': 11742, 'epoch': 2} {'type': 'loss', 'content': 0.10679741948843002, 'timestamp': '2025-09-30 22:30:29.931883', 'step': 11743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:30.002949', 'step': 11743, 'epoch': 2} {'type': 'loss', 'content': 0.15253855288028717, 'timestamp': '2025-09-30 22:30:30.011818', 'step': 11744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:30.087839', 'step': 11744, 'epoch': 2} {'type': 'loss', 'content': 0.13284213840961456, 'timestamp': '2025-09-30 22:30:30.102253', 'step': 11745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:30.187762', 'step': 11745, 'epoch': 2} {'type': 'loss', 'content': 0.08979663252830505, 'timestamp': '2025-09-30 22:30:30.193496', 'step': 11746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:30.266016', 'step': 11746, 'epoch': 2} {'type': 'loss', 'content': 0.15143883228302002, 'timestamp': '2025-09-30 22:30:30.270267', 'step': 11747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:30.344952', 'step': 11747, 'epoch': 2} {'type': 'loss', 'content': 0.12214408069849014, 'timestamp': '2025-09-30 22:30:30.365138', 'step': 11748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:30.436424', 'step': 11748, 'epoch': 2} {'type': 'loss', 'content': 0.08678333461284637, 'timestamp': '2025-09-30 22:30:30.454408', 'step': 11749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:30.563956', 'step': 11749, 'epoch': 2} {'type': 'loss', 'content': 0.22785450518131256, 'timestamp': '2025-09-30 22:30:30.572496', 'step': 11750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:30.654567', 'step': 11750, 'epoch': 2} {'type': 'loss', 'content': 0.07659348845481873, 'timestamp': '2025-09-30 22:30:30.665631', 'step': 11751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:30.754861', 'step': 11751, 'epoch': 2} {'type': 'loss', 'content': 0.07646079361438751, 'timestamp': '2025-09-30 22:30:30.763381', 'step': 11752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:30.854869', 'step': 11752, 'epoch': 2} {'type': 'loss', 'content': 0.2182772010564804, 'timestamp': '2025-09-30 22:30:30.862878', 'step': 11753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:30.965396', 'step': 11753, 'epoch': 2} {'type': 'loss', 'content': 0.09127147495746613, 'timestamp': '2025-09-30 22:30:30.969283', 'step': 11754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:31.038577', 'step': 11754, 'epoch': 2} {'type': 'loss', 'content': 0.0863061398267746, 'timestamp': '2025-09-30 22:30:31.042256', 'step': 11755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:31.103435', 'step': 11755, 'epoch': 2} {'type': 'loss', 'content': 0.13378441333770752, 'timestamp': '2025-09-30 22:30:31.110704', 'step': 11756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:31.188471', 'step': 11756, 'epoch': 2} {'type': 'loss', 'content': 0.15121795237064362, 'timestamp': '2025-09-30 22:30:31.193542', 'step': 11757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:31.275865', 'step': 11757, 'epoch': 2} {'type': 'loss', 'content': 0.12732666730880737, 'timestamp': '2025-09-30 22:30:31.280973', 'step': 11758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:31.355832', 'step': 11758, 'epoch': 2} {'type': 'loss', 'content': 0.20973312854766846, 'timestamp': '2025-09-30 22:30:31.359108', 'step': 11759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:31.417951', 'step': 11759, 'epoch': 2} {'type': 'loss', 'content': 0.15467576682567596, 'timestamp': '2025-09-30 22:30:31.426745', 'step': 11760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:30:31.484505', 'step': 11760, 'epoch': 2} {'type': 'loss', 'content': 0.15951423346996307, 'timestamp': '2025-09-30 22:30:31.499497', 'step': 11761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:31.559887', 'step': 11761, 'epoch': 2} {'type': 'loss', 'content': 0.08295618742704391, 'timestamp': '2025-09-30 22:30:31.564105', 'step': 11762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:31.625393', 'step': 11762, 'epoch': 2} {'type': 'loss', 'content': 0.12499603629112244, 'timestamp': '2025-09-30 22:30:31.631827', 'step': 11763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:31.692013', 'step': 11763, 'epoch': 2} {'type': 'loss', 'content': 0.10556458681821823, 'timestamp': '2025-09-30 22:30:31.699909', 'step': 11764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:31.759663', 'step': 11764, 'epoch': 2} {'type': 'loss', 'content': 0.15178799629211426, 'timestamp': '2025-09-30 22:30:31.762837', 'step': 11765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:31.820646', 'step': 11765, 'epoch': 2} {'type': 'loss', 'content': 0.05143438279628754, 'timestamp': '2025-09-30 22:30:31.823884', 'step': 11766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:31.883702', 'step': 11766, 'epoch': 2} {'type': 'loss', 'content': 0.08502757549285889, 'timestamp': '2025-09-30 22:30:31.886844', 'step': 11767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:31.946121', 'step': 11767, 'epoch': 2} {'type': 'loss', 'content': 0.1535840630531311, 'timestamp': '2025-09-30 22:30:31.952850', 'step': 11768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:32.013811', 'step': 11768, 'epoch': 2} {'type': 'loss', 'content': 0.11608456820249557, 'timestamp': '2025-09-30 22:30:32.017284', 'step': 11769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:32.079072', 'step': 11769, 'epoch': 2} {'type': 'loss', 'content': 0.15070903301239014, 'timestamp': '2025-09-30 22:30:32.083613', 'step': 11770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:32.150562', 'step': 11770, 'epoch': 2} {'type': 'loss', 'content': 0.034891899675130844, 'timestamp': '2025-09-30 22:30:32.155620', 'step': 11771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:32.214609', 'step': 11771, 'epoch': 2} {'type': 'loss', 'content': 0.12328175455331802, 'timestamp': '2025-09-30 22:30:32.231813', 'step': 11772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:32.300575', 'step': 11772, 'epoch': 2} {'type': 'loss', 'content': 0.09299031645059586, 'timestamp': '2025-09-30 22:30:32.305493', 'step': 11773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:32.363142', 'step': 11773, 'epoch': 2} {'type': 'loss', 'content': 0.1636260747909546, 'timestamp': '2025-09-30 22:30:32.366351', 'step': 11774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:32.427377', 'step': 11774, 'epoch': 2} {'type': 'loss', 'content': 0.18663902580738068, 'timestamp': '2025-09-30 22:30:32.431155', 'step': 11775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:32.488610', 'step': 11775, 'epoch': 2} {'type': 'loss', 'content': 0.09874629229307175, 'timestamp': '2025-09-30 22:30:32.497222', 'step': 11776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:32.556414', 'step': 11776, 'epoch': 2} {'type': 'loss', 'content': 0.2205398976802826, 'timestamp': '2025-09-30 22:30:32.560824', 'step': 11777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:32.619856', 'step': 11777, 'epoch': 2} {'type': 'loss', 'content': 0.07102282345294952, 'timestamp': '2025-09-30 22:30:32.624623', 'step': 11778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:32.684329', 'step': 11778, 'epoch': 2} {'type': 'loss', 'content': 0.10896627604961395, 'timestamp': '2025-09-30 22:30:32.689503', 'step': 11779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:32.748284', 'step': 11779, 'epoch': 2} {'type': 'loss', 'content': 0.16250155866146088, 'timestamp': '2025-09-30 22:30:32.755700', 'step': 11780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:32.826532', 'step': 11780, 'epoch': 2} {'type': 'loss', 'content': 0.10562051832675934, 'timestamp': '2025-09-30 22:30:32.831381', 'step': 11781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:32.892081', 'step': 11781, 'epoch': 2} {'type': 'loss', 'content': 0.15088555216789246, 'timestamp': '2025-09-30 22:30:32.897140', 'step': 11782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:32.957386', 'step': 11782, 'epoch': 2} {'type': 'loss', 'content': 0.15909215807914734, 'timestamp': '2025-09-30 22:30:32.962517', 'step': 11783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:33.020768', 'step': 11783, 'epoch': 2} {'type': 'loss', 'content': 0.1501004993915558, 'timestamp': '2025-09-30 22:30:33.028727', 'step': 11784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:33.087594', 'step': 11784, 'epoch': 2} {'type': 'loss', 'content': 0.1062445268034935, 'timestamp': '2025-09-30 22:30:33.090567', 'step': 11785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:33.163754', 'step': 11785, 'epoch': 2} {'type': 'loss', 'content': 0.10832127928733826, 'timestamp': '2025-09-30 22:30:33.167362', 'step': 11786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:33.226622', 'step': 11786, 'epoch': 2} {'type': 'loss', 'content': 0.19987677037715912, 'timestamp': '2025-09-30 22:30:33.230035', 'step': 11787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:33.303583', 'step': 11787, 'epoch': 2} {'type': 'loss', 'content': 0.12205109000205994, 'timestamp': '2025-09-30 22:30:33.310191', 'step': 11788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:33.367359', 'step': 11788, 'epoch': 2} {'type': 'loss', 'content': 0.1289380043745041, 'timestamp': '2025-09-30 22:30:33.371974', 'step': 11789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:33.433306', 'step': 11789, 'epoch': 2} {'type': 'loss', 'content': 0.2023414969444275, 'timestamp': '2025-09-30 22:30:33.447738', 'step': 11790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:33.525583', 'step': 11790, 'epoch': 2} {'type': 'loss', 'content': 0.16684891283512115, 'timestamp': '2025-09-30 22:30:33.538243', 'step': 11791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:33.597757', 'step': 11791, 'epoch': 2} {'type': 'loss', 'content': 0.07345519959926605, 'timestamp': '2025-09-30 22:30:33.616993', 'step': 11792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:33.676059', 'step': 11792, 'epoch': 2} {'type': 'loss', 'content': 0.11520519852638245, 'timestamp': '2025-09-30 22:30:33.680831', 'step': 11793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:33.742884', 'step': 11793, 'epoch': 2} {'type': 'loss', 'content': 0.05291270092129707, 'timestamp': '2025-09-30 22:30:33.748938', 'step': 11794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:33.811662', 'step': 11794, 'epoch': 2} {'type': 'loss', 'content': 0.09749636054039001, 'timestamp': '2025-09-30 22:30:33.816425', 'step': 11795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:33.884948', 'step': 11795, 'epoch': 2} {'type': 'loss', 'content': 0.15766467154026031, 'timestamp': '2025-09-30 22:30:33.892913', 'step': 11796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:33.957076', 'step': 11796, 'epoch': 2} {'type': 'loss', 'content': 0.08907090872526169, 'timestamp': '2025-09-30 22:30:33.963196', 'step': 11797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:34.027779', 'step': 11797, 'epoch': 2} {'type': 'loss', 'content': 0.1353946477174759, 'timestamp': '2025-09-30 22:30:34.043018', 'step': 11798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:34.112226', 'step': 11798, 'epoch': 2} {'type': 'loss', 'content': 0.14164705574512482, 'timestamp': '2025-09-30 22:30:34.115922', 'step': 11799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:34.174613', 'step': 11799, 'epoch': 2} {'type': 'loss', 'content': 0.09234268963336945, 'timestamp': '2025-09-30 22:30:34.182553', 'step': 11800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:34.253933', 'step': 11800, 'epoch': 2} {'type': 'loss', 'content': 0.11014311015605927, 'timestamp': '2025-09-30 22:30:34.257640', 'step': 11801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:34.317052', 'step': 11801, 'epoch': 2} {'type': 'loss', 'content': 0.14341001212596893, 'timestamp': '2025-09-30 22:30:34.322201', 'step': 11802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:34.383545', 'step': 11802, 'epoch': 2} {'type': 'loss', 'content': 0.11361895501613617, 'timestamp': '2025-09-30 22:30:34.386861', 'step': 11803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:34.459270', 'step': 11803, 'epoch': 2} {'type': 'loss', 'content': 0.1545771360397339, 'timestamp': '2025-09-30 22:30:34.475982', 'step': 11804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:34.535059', 'step': 11804, 'epoch': 2} {'type': 'loss', 'content': 0.06786955147981644, 'timestamp': '2025-09-30 22:30:34.539579', 'step': 11805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:34.598280', 'step': 11805, 'epoch': 2} {'type': 'loss', 'content': 0.15520527958869934, 'timestamp': '2025-09-30 22:30:34.602004', 'step': 11806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:34.671128', 'step': 11806, 'epoch': 2} {'type': 'loss', 'content': 0.13671129941940308, 'timestamp': '2025-09-30 22:30:34.676376', 'step': 11807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:34.745880', 'step': 11807, 'epoch': 2} {'type': 'loss', 'content': 0.14235873520374298, 'timestamp': '2025-09-30 22:30:34.753260', 'step': 11808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:34.815349', 'step': 11808, 'epoch': 2} {'type': 'loss', 'content': 0.1119987741112709, 'timestamp': '2025-09-30 22:30:34.819902', 'step': 11809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:34.889306', 'step': 11809, 'epoch': 2} {'type': 'loss', 'content': 0.19310879707336426, 'timestamp': '2025-09-30 22:30:34.894235', 'step': 11810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:34.964694', 'step': 11810, 'epoch': 2} {'type': 'loss', 'content': 0.10734646767377853, 'timestamp': '2025-09-30 22:30:34.967394', 'step': 11811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:35.026535', 'step': 11811, 'epoch': 2} {'type': 'loss', 'content': 0.13451656699180603, 'timestamp': '2025-09-30 22:30:35.034508', 'step': 11812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:35.098775', 'step': 11812, 'epoch': 2} {'type': 'loss', 'content': 0.09995418787002563, 'timestamp': '2025-09-30 22:30:35.102037', 'step': 11813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:35.168064', 'step': 11813, 'epoch': 2} {'type': 'loss', 'content': 0.13070833683013916, 'timestamp': '2025-09-30 22:30:35.172104', 'step': 11814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:35.245962', 'step': 11814, 'epoch': 2} {'type': 'loss', 'content': 0.18303972482681274, 'timestamp': '2025-09-30 22:30:35.251361', 'step': 11815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:35.309662', 'step': 11815, 'epoch': 2} {'type': 'loss', 'content': 0.04723391681909561, 'timestamp': '2025-09-30 22:30:35.316292', 'step': 11816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:35.374478', 'step': 11816, 'epoch': 2} {'type': 'loss', 'content': 0.20248889923095703, 'timestamp': '2025-09-30 22:30:35.376896', 'step': 11817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:35.438377', 'step': 11817, 'epoch': 2} {'type': 'loss', 'content': 0.15674547851085663, 'timestamp': '2025-09-30 22:30:35.442200', 'step': 11818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:35.500329', 'step': 11818, 'epoch': 2} {'type': 'loss', 'content': 0.1948387622833252, 'timestamp': '2025-09-30 22:30:35.504575', 'step': 11819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:35.568115', 'step': 11819, 'epoch': 2} {'type': 'loss', 'content': 0.03320347145199776, 'timestamp': '2025-09-30 22:30:35.576437', 'step': 11820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:35.635547', 'step': 11820, 'epoch': 2} {'type': 'loss', 'content': 0.04696757718920708, 'timestamp': '2025-09-30 22:30:35.640573', 'step': 11821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:35.711127', 'step': 11821, 'epoch': 2} {'type': 'loss', 'content': 0.17449408769607544, 'timestamp': '2025-09-30 22:30:35.715525', 'step': 11822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:35.774976', 'step': 11822, 'epoch': 2} {'type': 'loss', 'content': 0.07978685945272446, 'timestamp': '2025-09-30 22:30:35.782070', 'step': 11823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:35.843104', 'step': 11823, 'epoch': 2} {'type': 'loss', 'content': 0.11516385525465012, 'timestamp': '2025-09-30 22:30:35.851728', 'step': 11824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:35.911019', 'step': 11824, 'epoch': 2} {'type': 'loss', 'content': 0.13087816536426544, 'timestamp': '2025-09-30 22:30:35.921648', 'step': 11825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:35.987810', 'step': 11825, 'epoch': 2} {'type': 'loss', 'content': 0.12973929941654205, 'timestamp': '2025-09-30 22:30:35.991529', 'step': 11826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:36.048369', 'step': 11826, 'epoch': 2} {'type': 'loss', 'content': 0.1151726096868515, 'timestamp': '2025-09-30 22:30:36.050565', 'step': 11827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:36.111687', 'step': 11827, 'epoch': 2} {'type': 'loss', 'content': 0.19996991753578186, 'timestamp': '2025-09-30 22:30:36.122819', 'step': 11828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:36.180634', 'step': 11828, 'epoch': 2} {'type': 'loss', 'content': 0.09361465275287628, 'timestamp': '2025-09-30 22:30:36.190738', 'step': 11829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:36.263620', 'step': 11829, 'epoch': 2} {'type': 'loss', 'content': 0.25833404064178467, 'timestamp': '2025-09-30 22:30:36.267818', 'step': 11830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:36.333440', 'step': 11830, 'epoch': 2} {'type': 'loss', 'content': 0.14326748251914978, 'timestamp': '2025-09-30 22:30:36.350862', 'step': 11831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:36.425404', 'step': 11831, 'epoch': 2} {'type': 'loss', 'content': 0.18866001069545746, 'timestamp': '2025-09-30 22:30:36.443704', 'step': 11832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:36.509291', 'step': 11832, 'epoch': 2} {'type': 'loss', 'content': 0.10998225957155228, 'timestamp': '2025-09-30 22:30:36.512139', 'step': 11833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:36.570944', 'step': 11833, 'epoch': 2} {'type': 'loss', 'content': 0.22176697850227356, 'timestamp': '2025-09-30 22:30:36.573277', 'step': 11834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:36.636263', 'step': 11834, 'epoch': 2} {'type': 'loss', 'content': 0.2029571533203125, 'timestamp': '2025-09-30 22:30:36.641432', 'step': 11835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:36.699901', 'step': 11835, 'epoch': 2} {'type': 'loss', 'content': 0.11303906887769699, 'timestamp': '2025-09-30 22:30:36.716367', 'step': 11836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:36.778253', 'step': 11836, 'epoch': 2} {'type': 'loss', 'content': 0.0698719248175621, 'timestamp': '2025-09-30 22:30:36.782426', 'step': 11837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:36.840538', 'step': 11837, 'epoch': 2} {'type': 'loss', 'content': 0.07984668761491776, 'timestamp': '2025-09-30 22:30:36.843752', 'step': 11838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:36.909471', 'step': 11838, 'epoch': 2} {'type': 'loss', 'content': 0.17633265256881714, 'timestamp': '2025-09-30 22:30:36.926598', 'step': 11839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:36.996459', 'step': 11839, 'epoch': 2} {'type': 'loss', 'content': 0.11924798041582108, 'timestamp': '2025-09-30 22:30:37.003859', 'step': 11840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:37.062887', 'step': 11840, 'epoch': 2} {'type': 'loss', 'content': 0.21057996153831482, 'timestamp': '2025-09-30 22:30:37.066410', 'step': 11841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:37.125124', 'step': 11841, 'epoch': 2} {'type': 'loss', 'content': 0.19669008255004883, 'timestamp': '2025-09-30 22:30:37.130161', 'step': 11842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:37.191197', 'step': 11842, 'epoch': 2} {'type': 'loss', 'content': 0.10916130989789963, 'timestamp': '2025-09-30 22:30:37.203124', 'step': 11843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:37.267553', 'step': 11843, 'epoch': 2} {'type': 'loss', 'content': 0.04460174962878227, 'timestamp': '2025-09-30 22:30:37.274324', 'step': 11844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:37.349122', 'step': 11844, 'epoch': 2} {'type': 'loss', 'content': 0.16309821605682373, 'timestamp': '2025-09-30 22:30:37.353241', 'step': 11845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:37.423957', 'step': 11845, 'epoch': 2} {'type': 'loss', 'content': 0.14696507155895233, 'timestamp': '2025-09-30 22:30:37.427232', 'step': 11846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:37.485134', 'step': 11846, 'epoch': 2} {'type': 'loss', 'content': 0.1532212197780609, 'timestamp': '2025-09-30 22:30:37.495193', 'step': 11847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:37.552611', 'step': 11847, 'epoch': 2} {'type': 'loss', 'content': 0.03280913829803467, 'timestamp': '2025-09-30 22:30:37.559810', 'step': 11848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:37.623819', 'step': 11848, 'epoch': 2} {'type': 'loss', 'content': 0.14019450545310974, 'timestamp': '2025-09-30 22:30:37.628653', 'step': 11849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:37.687638', 'step': 11849, 'epoch': 2} {'type': 'loss', 'content': 0.0756072923541069, 'timestamp': '2025-09-30 22:30:37.691748', 'step': 11850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:37.750218', 'step': 11850, 'epoch': 2} {'type': 'loss', 'content': 0.1120292991399765, 'timestamp': '2025-09-30 22:30:37.753655', 'step': 11851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:37.825389', 'step': 11851, 'epoch': 2} {'type': 'loss', 'content': 0.1344933658838272, 'timestamp': '2025-09-30 22:30:37.841925', 'step': 11852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:37.906639', 'step': 11852, 'epoch': 2} {'type': 'loss', 'content': 0.06438621878623962, 'timestamp': '2025-09-30 22:30:37.909757', 'step': 11853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:37.977546', 'step': 11853, 'epoch': 2} {'type': 'loss', 'content': 0.12603217363357544, 'timestamp': '2025-09-30 22:30:37.980419', 'step': 11854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:38.042755', 'step': 11854, 'epoch': 2} {'type': 'loss', 'content': 0.11684694141149521, 'timestamp': '2025-09-30 22:30:38.047730', 'step': 11855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:38.106679', 'step': 11855, 'epoch': 2} {'type': 'loss', 'content': 0.19486451148986816, 'timestamp': '2025-09-30 22:30:38.114736', 'step': 11856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:38.181243', 'step': 11856, 'epoch': 2} {'type': 'loss', 'content': 0.12476207315921783, 'timestamp': '2025-09-30 22:30:38.184627', 'step': 11857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:38.242147', 'step': 11857, 'epoch': 2} {'type': 'loss', 'content': 0.12005382031202316, 'timestamp': '2025-09-30 22:30:38.244984', 'step': 11858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:38.303269', 'step': 11858, 'epoch': 2} {'type': 'loss', 'content': 0.12317115068435669, 'timestamp': '2025-09-30 22:30:38.307079', 'step': 11859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:38.368752', 'step': 11859, 'epoch': 2} {'type': 'loss', 'content': 0.1556033045053482, 'timestamp': '2025-09-30 22:30:38.376416', 'step': 11860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:38.433631', 'step': 11860, 'epoch': 2} {'type': 'loss', 'content': 0.11286848038434982, 'timestamp': '2025-09-30 22:30:38.437300', 'step': 11861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:38.496696', 'step': 11861, 'epoch': 2} {'type': 'loss', 'content': 0.22137431800365448, 'timestamp': '2025-09-30 22:30:38.501062', 'step': 11862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:38.565606', 'step': 11862, 'epoch': 2} {'type': 'loss', 'content': 0.12422515451908112, 'timestamp': '2025-09-30 22:30:38.569455', 'step': 11863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:38.630678', 'step': 11863, 'epoch': 2} {'type': 'loss', 'content': 0.11793066561222076, 'timestamp': '2025-09-30 22:30:38.639532', 'step': 11864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:38.698696', 'step': 11864, 'epoch': 2} {'type': 'loss', 'content': 0.0601009875535965, 'timestamp': '2025-09-30 22:30:38.702804', 'step': 11865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:38.759968', 'step': 11865, 'epoch': 2} {'type': 'loss', 'content': 0.1460576057434082, 'timestamp': '2025-09-30 22:30:38.762853', 'step': 11866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:38.822520', 'step': 11866, 'epoch': 2} {'type': 'loss', 'content': 0.06544345617294312, 'timestamp': '2025-09-30 22:30:38.825633', 'step': 11867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:38.884587', 'step': 11867, 'epoch': 2} {'type': 'loss', 'content': 0.1374431997537613, 'timestamp': '2025-09-30 22:30:38.892074', 'step': 11868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:38.956855', 'step': 11868, 'epoch': 2} {'type': 'loss', 'content': 0.0965239554643631, 'timestamp': '2025-09-30 22:30:38.960293', 'step': 11869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:39.018505', 'step': 11869, 'epoch': 2} {'type': 'loss', 'content': 0.08946516364812851, 'timestamp': '2025-09-30 22:30:39.034165', 'step': 11870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:39.095883', 'step': 11870, 'epoch': 2} {'type': 'loss', 'content': 0.18032127618789673, 'timestamp': '2025-09-30 22:30:39.101287', 'step': 11871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:39.163525', 'step': 11871, 'epoch': 2} {'type': 'loss', 'content': 0.10449431836605072, 'timestamp': '2025-09-30 22:30:39.171074', 'step': 11872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:39.230642', 'step': 11872, 'epoch': 2} {'type': 'loss', 'content': 0.14680901169776917, 'timestamp': '2025-09-30 22:30:39.233082', 'step': 11873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:39.299131', 'step': 11873, 'epoch': 2} {'type': 'loss', 'content': 0.16342595219612122, 'timestamp': '2025-09-30 22:30:39.309445', 'step': 11874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:39.368465', 'step': 11874, 'epoch': 2} {'type': 'loss', 'content': 0.035940442234277725, 'timestamp': '2025-09-30 22:30:39.371292', 'step': 11875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:39.429061', 'step': 11875, 'epoch': 2} {'type': 'loss', 'content': 0.12999805808067322, 'timestamp': '2025-09-30 22:30:39.436133', 'step': 11876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:39.496377', 'step': 11876, 'epoch': 2} {'type': 'loss', 'content': 0.10169830918312073, 'timestamp': '2025-09-30 22:30:39.504065', 'step': 11877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:39.562525', 'step': 11877, 'epoch': 2} {'type': 'loss', 'content': 0.12257525324821472, 'timestamp': '2025-09-30 22:30:39.566928', 'step': 11878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:39.627167', 'step': 11878, 'epoch': 2} {'type': 'loss', 'content': 0.104786716401577, 'timestamp': '2025-09-30 22:30:39.630985', 'step': 11879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:39.689710', 'step': 11879, 'epoch': 2} {'type': 'loss', 'content': 0.1579175442457199, 'timestamp': '2025-09-30 22:30:39.697725', 'step': 11880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:39.770458', 'step': 11880, 'epoch': 2} {'type': 'loss', 'content': 0.15471653640270233, 'timestamp': '2025-09-30 22:30:39.775851', 'step': 11881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:39.833822', 'step': 11881, 'epoch': 2} {'type': 'loss', 'content': 0.184633269906044, 'timestamp': '2025-09-30 22:30:39.837368', 'step': 11882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:39.909547', 'step': 11882, 'epoch': 2} {'type': 'loss', 'content': 0.16076605021953583, 'timestamp': '2025-09-30 22:30:39.920824', 'step': 11883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:39.987325', 'step': 11883, 'epoch': 2} {'type': 'loss', 'content': 0.12678970396518707, 'timestamp': '2025-09-30 22:30:39.999589', 'step': 11884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.061789', 'step': 11884, 'epoch': 2} {'type': 'loss', 'content': 0.14618004858493805, 'timestamp': '2025-09-30 22:30:40.064237', 'step': 11885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:40.122118', 'step': 11885, 'epoch': 2} {'type': 'loss', 'content': 0.1813548058271408, 'timestamp': '2025-09-30 22:30:40.125649', 'step': 11886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:40.185945', 'step': 11886, 'epoch': 2} {'type': 'loss', 'content': 0.1419229954481125, 'timestamp': '2025-09-30 22:30:40.190056', 'step': 11887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:40.250086', 'step': 11887, 'epoch': 2} {'type': 'loss', 'content': 0.06804397702217102, 'timestamp': '2025-09-30 22:30:40.257797', 'step': 11888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.316653', 'step': 11888, 'epoch': 2} {'type': 'loss', 'content': 0.16226056218147278, 'timestamp': '2025-09-30 22:30:40.320293', 'step': 11889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.379060', 'step': 11889, 'epoch': 2} {'type': 'loss', 'content': 0.12689581513404846, 'timestamp': '2025-09-30 22:30:40.382333', 'step': 11890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:40.464471', 'step': 11890, 'epoch': 2} {'type': 'loss', 'content': 0.1504780650138855, 'timestamp': '2025-09-30 22:30:40.467790', 'step': 11891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.537964', 'step': 11891, 'epoch': 2} {'type': 'loss', 'content': 0.22103679180145264, 'timestamp': '2025-09-30 22:30:40.549104', 'step': 11892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:40.612426', 'step': 11892, 'epoch': 2} {'type': 'loss', 'content': 0.14604735374450684, 'timestamp': '2025-09-30 22:30:40.615333', 'step': 11893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.673418', 'step': 11893, 'epoch': 2} {'type': 'loss', 'content': 0.09857923537492752, 'timestamp': '2025-09-30 22:30:40.676800', 'step': 11894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:40.743592', 'step': 11894, 'epoch': 2} {'type': 'loss', 'content': 0.23514917492866516, 'timestamp': '2025-09-30 22:30:40.746269', 'step': 11895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.817721', 'step': 11895, 'epoch': 2} {'type': 'loss', 'content': 0.052701063454151154, 'timestamp': '2025-09-30 22:30:40.826329', 'step': 11896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.882501', 'step': 11896, 'epoch': 2} {'type': 'loss', 'content': 0.18601009249687195, 'timestamp': '2025-09-30 22:30:40.885279', 'step': 11897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:40.943309', 'step': 11897, 'epoch': 2} {'type': 'loss', 'content': 0.09435015916824341, 'timestamp': '2025-09-30 22:30:40.946326', 'step': 11898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:41.005503', 'step': 11898, 'epoch': 2} {'type': 'loss', 'content': 0.12976273894309998, 'timestamp': '2025-09-30 22:30:41.009327', 'step': 11899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:41.072619', 'step': 11899, 'epoch': 2} {'type': 'loss', 'content': 0.11219676584005356, 'timestamp': '2025-09-30 22:30:41.079101', 'step': 11900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:41.135923', 'step': 11900, 'epoch': 2} {'type': 'loss', 'content': 0.25572669506073, 'timestamp': '2025-09-30 22:30:41.138758', 'step': 11901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:41.196248', 'step': 11901, 'epoch': 2} {'type': 'loss', 'content': 0.14567717909812927, 'timestamp': '2025-09-30 22:30:41.199333', 'step': 11902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:41.257126', 'step': 11902, 'epoch': 2} {'type': 'loss', 'content': 0.12976841628551483, 'timestamp': '2025-09-30 22:30:41.259809', 'step': 11903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:41.317589', 'step': 11903, 'epoch': 2} {'type': 'loss', 'content': 0.19152423739433289, 'timestamp': '2025-09-30 22:30:41.327699', 'step': 11904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:41.390366', 'step': 11904, 'epoch': 2} {'type': 'loss', 'content': 0.12923617660999298, 'timestamp': '2025-09-30 22:30:41.393786', 'step': 11905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:41.450954', 'step': 11905, 'epoch': 2} {'type': 'loss', 'content': 0.09675166010856628, 'timestamp': '2025-09-30 22:30:41.455743', 'step': 11906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:41.530658', 'step': 11906, 'epoch': 2} {'type': 'loss', 'content': 0.08618994057178497, 'timestamp': '2025-09-30 22:30:41.534595', 'step': 11907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:41.611469', 'step': 11907, 'epoch': 2} {'type': 'loss', 'content': 0.14052604138851166, 'timestamp': '2025-09-30 22:30:41.618535', 'step': 11908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:41.687620', 'step': 11908, 'epoch': 2} {'type': 'loss', 'content': 0.0667857825756073, 'timestamp': '2025-09-30 22:30:41.690801', 'step': 11909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:41.752546', 'step': 11909, 'epoch': 2} {'type': 'loss', 'content': 0.2035595327615738, 'timestamp': '2025-09-30 22:30:41.762747', 'step': 11910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:41.824685', 'step': 11910, 'epoch': 2} {'type': 'loss', 'content': 0.14832574129104614, 'timestamp': '2025-09-30 22:30:41.834755', 'step': 11911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:41.899444', 'step': 11911, 'epoch': 2} {'type': 'loss', 'content': 0.10589247941970825, 'timestamp': '2025-09-30 22:30:41.907925', 'step': 11912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:41.970628', 'step': 11912, 'epoch': 2} {'type': 'loss', 'content': 0.1142054870724678, 'timestamp': '2025-09-30 22:30:41.973793', 'step': 11913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.032883', 'step': 11913, 'epoch': 2} {'type': 'loss', 'content': 0.1261066049337387, 'timestamp': '2025-09-30 22:30:42.035831', 'step': 11914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:42.103269', 'step': 11914, 'epoch': 2} {'type': 'loss', 'content': 0.2353096604347229, 'timestamp': '2025-09-30 22:30:42.109398', 'step': 11915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.169666', 'step': 11915, 'epoch': 2} {'type': 'loss', 'content': 0.09006346017122269, 'timestamp': '2025-09-30 22:30:42.176012', 'step': 11916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.234720', 'step': 11916, 'epoch': 2} {'type': 'loss', 'content': 0.0723029375076294, 'timestamp': '2025-09-30 22:30:42.240103', 'step': 11917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:42.300052', 'step': 11917, 'epoch': 2} {'type': 'loss', 'content': 0.09973727911710739, 'timestamp': '2025-09-30 22:30:42.302949', 'step': 11918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:42.359649', 'step': 11918, 'epoch': 2} {'type': 'loss', 'content': 0.0821574404835701, 'timestamp': '2025-09-30 22:30:42.361956', 'step': 11919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.421693', 'step': 11919, 'epoch': 2} {'type': 'loss', 'content': 0.1476140022277832, 'timestamp': '2025-09-30 22:30:42.428756', 'step': 11920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.485121', 'step': 11920, 'epoch': 2} {'type': 'loss', 'content': 0.13577675819396973, 'timestamp': '2025-09-30 22:30:42.492465', 'step': 11921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.552680', 'step': 11921, 'epoch': 2} {'type': 'loss', 'content': 0.07266844063997269, 'timestamp': '2025-09-30 22:30:42.555967', 'step': 11922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.615223', 'step': 11922, 'epoch': 2} {'type': 'loss', 'content': 0.15923583507537842, 'timestamp': '2025-09-30 22:30:42.625421', 'step': 11923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.692640', 'step': 11923, 'epoch': 2} {'type': 'loss', 'content': 0.16568206250667572, 'timestamp': '2025-09-30 22:30:42.706982', 'step': 11924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.773127', 'step': 11924, 'epoch': 2} {'type': 'loss', 'content': 0.07854437828063965, 'timestamp': '2025-09-30 22:30:42.776652', 'step': 11925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:42.833374', 'step': 11925, 'epoch': 2} {'type': 'loss', 'content': 0.14865027368068695, 'timestamp': '2025-09-30 22:30:42.838292', 'step': 11926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:42.898054', 'step': 11926, 'epoch': 2} {'type': 'loss', 'content': 0.14817480742931366, 'timestamp': '2025-09-30 22:30:42.903216', 'step': 11927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:30:42.964195', 'step': 11927, 'epoch': 2} {'type': 'loss', 'content': 0.11201787739992142, 'timestamp': '2025-09-30 22:30:42.978374', 'step': 11928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:43.038916', 'step': 11928, 'epoch': 2} {'type': 'loss', 'content': 0.05699072405695915, 'timestamp': '2025-09-30 22:30:43.042143', 'step': 11929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:43.100428', 'step': 11929, 'epoch': 2} {'type': 'loss', 'content': 0.1041732057929039, 'timestamp': '2025-09-30 22:30:43.104564', 'step': 11930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:43.162261', 'step': 11930, 'epoch': 2} {'type': 'loss', 'content': 0.1550750881433487, 'timestamp': '2025-09-30 22:30:43.168177', 'step': 11931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:43.227815', 'step': 11931, 'epoch': 2} {'type': 'loss', 'content': 0.10748616605997086, 'timestamp': '2025-09-30 22:30:43.238353', 'step': 11932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:43.295084', 'step': 11932, 'epoch': 2} {'type': 'loss', 'content': 0.18715010583400726, 'timestamp': '2025-09-30 22:30:43.298536', 'step': 11933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:43.358513', 'step': 11933, 'epoch': 2} {'type': 'loss', 'content': 0.05685323476791382, 'timestamp': '2025-09-30 22:30:43.365785', 'step': 11934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:43.429850', 'step': 11934, 'epoch': 2} {'type': 'loss', 'content': 0.14525741338729858, 'timestamp': '2025-09-30 22:30:43.433317', 'step': 11935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:43.492941', 'step': 11935, 'epoch': 2} {'type': 'loss', 'content': 0.1320028007030487, 'timestamp': '2025-09-30 22:30:43.509201', 'step': 11936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:43.577940', 'step': 11936, 'epoch': 2} {'type': 'loss', 'content': 0.1337566375732422, 'timestamp': '2025-09-30 22:30:43.580444', 'step': 11937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:43.638645', 'step': 11937, 'epoch': 2} {'type': 'loss', 'content': 0.07396583259105682, 'timestamp': '2025-09-30 22:30:43.644472', 'step': 11938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:43.702080', 'step': 11938, 'epoch': 2} {'type': 'loss', 'content': 0.06421706080436707, 'timestamp': '2025-09-30 22:30:43.705119', 'step': 11939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:43.764642', 'step': 11939, 'epoch': 2} {'type': 'loss', 'content': 0.16107359528541565, 'timestamp': '2025-09-30 22:30:43.775787', 'step': 11940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:43.834949', 'step': 11940, 'epoch': 2} {'type': 'loss', 'content': 0.12798798084259033, 'timestamp': '2025-09-30 22:30:43.837827', 'step': 11941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:43.901084', 'step': 11941, 'epoch': 2} {'type': 'loss', 'content': 0.11261311918497086, 'timestamp': '2025-09-30 22:30:43.904556', 'step': 11942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:43.970481', 'step': 11942, 'epoch': 2} {'type': 'loss', 'content': 0.09240676462650299, 'timestamp': '2025-09-30 22:30:43.973065', 'step': 11943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:44.036813', 'step': 11943, 'epoch': 2} {'type': 'loss', 'content': 0.1342797875404358, 'timestamp': '2025-09-30 22:30:44.043699', 'step': 11944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:44.116818', 'step': 11944, 'epoch': 2} {'type': 'loss', 'content': 0.04449433460831642, 'timestamp': '2025-09-30 22:30:44.123912', 'step': 11945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:44.187550', 'step': 11945, 'epoch': 2} {'type': 'loss', 'content': 0.14964933693408966, 'timestamp': '2025-09-30 22:30:44.189924', 'step': 11946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:44.251244', 'step': 11946, 'epoch': 2} {'type': 'loss', 'content': 0.15075470507144928, 'timestamp': '2025-09-30 22:30:44.257192', 'step': 11947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:44.317233', 'step': 11947, 'epoch': 2} {'type': 'loss', 'content': 0.08460649847984314, 'timestamp': '2025-09-30 22:30:44.323569', 'step': 11948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:44.386627', 'step': 11948, 'epoch': 2} {'type': 'loss', 'content': 0.08151878416538239, 'timestamp': '2025-09-30 22:30:44.391243', 'step': 11949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:44.452179', 'step': 11949, 'epoch': 2} {'type': 'loss', 'content': 0.09016440063714981, 'timestamp': '2025-09-30 22:30:44.455808', 'step': 11950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:44.513638', 'step': 11950, 'epoch': 2} {'type': 'loss', 'content': 0.11485642939805984, 'timestamp': '2025-09-30 22:30:44.516497', 'step': 11951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:44.579593', 'step': 11951, 'epoch': 2} {'type': 'loss', 'content': 0.08165427297353745, 'timestamp': '2025-09-30 22:30:44.591619', 'step': 11952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:44.650072', 'step': 11952, 'epoch': 2} {'type': 'loss', 'content': 0.1206449493765831, 'timestamp': '2025-09-30 22:30:44.657626', 'step': 11953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:44.721057', 'step': 11953, 'epoch': 2} {'type': 'loss', 'content': 0.05650277063250542, 'timestamp': '2025-09-30 22:30:44.727979', 'step': 11954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:44.785601', 'step': 11954, 'epoch': 2} {'type': 'loss', 'content': 0.0833720937371254, 'timestamp': '2025-09-30 22:30:44.793662', 'step': 11955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:44.858123', 'step': 11955, 'epoch': 2} {'type': 'loss', 'content': 0.11030828207731247, 'timestamp': '2025-09-30 22:30:44.868660', 'step': 11956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:44.926385', 'step': 11956, 'epoch': 2} {'type': 'loss', 'content': 0.23098120093345642, 'timestamp': '2025-09-30 22:30:44.929103', 'step': 11957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:44.992047', 'step': 11957, 'epoch': 2} {'type': 'loss', 'content': 0.13067691028118134, 'timestamp': '2025-09-30 22:30:44.997613', 'step': 11958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:45.056996', 'step': 11958, 'epoch': 2} {'type': 'loss', 'content': 0.18091990053653717, 'timestamp': '2025-09-30 22:30:45.061989', 'step': 11959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:45.122199', 'step': 11959, 'epoch': 2} {'type': 'loss', 'content': 0.08180048316717148, 'timestamp': '2025-09-30 22:30:45.129772', 'step': 11960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:45.204831', 'step': 11960, 'epoch': 2} {'type': 'loss', 'content': 0.13710017502307892, 'timestamp': '2025-09-30 22:30:45.207743', 'step': 11961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:45.266080', 'step': 11961, 'epoch': 2} {'type': 'loss', 'content': 0.061090096831321716, 'timestamp': '2025-09-30 22:30:45.269366', 'step': 11962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:45.330798', 'step': 11962, 'epoch': 2} {'type': 'loss', 'content': 0.19776466488838196, 'timestamp': '2025-09-30 22:30:45.333181', 'step': 11963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:45.392475', 'step': 11963, 'epoch': 2} {'type': 'loss', 'content': 0.13681194186210632, 'timestamp': '2025-09-30 22:30:45.405498', 'step': 11964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:45.464347', 'step': 11964, 'epoch': 2} {'type': 'loss', 'content': 0.16461804509162903, 'timestamp': '2025-09-30 22:30:45.467508', 'step': 11965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:45.527369', 'step': 11965, 'epoch': 2} {'type': 'loss', 'content': 0.0837627425789833, 'timestamp': '2025-09-30 22:30:45.535728', 'step': 11966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:45.594710', 'step': 11966, 'epoch': 2} {'type': 'loss', 'content': 0.20143014192581177, 'timestamp': '2025-09-30 22:30:45.597325', 'step': 11967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:45.659982', 'step': 11967, 'epoch': 2} {'type': 'loss', 'content': 0.15246479213237762, 'timestamp': '2025-09-30 22:30:45.666610', 'step': 11968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:45.723795', 'step': 11968, 'epoch': 2} {'type': 'loss', 'content': 0.12397799640893936, 'timestamp': '2025-09-30 22:30:45.726471', 'step': 11969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:45.786237', 'step': 11969, 'epoch': 2} {'type': 'loss', 'content': 0.16947712004184723, 'timestamp': '2025-09-30 22:30:45.790783', 'step': 11970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:45.850968', 'step': 11970, 'epoch': 2} {'type': 'loss', 'content': 0.12465453892946243, 'timestamp': '2025-09-30 22:30:45.853139', 'step': 11971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:45.911662', 'step': 11971, 'epoch': 2} {'type': 'loss', 'content': 0.13925008475780487, 'timestamp': '2025-09-30 22:30:45.917897', 'step': 11972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:45.974647', 'step': 11972, 'epoch': 2} {'type': 'loss', 'content': 0.15571998059749603, 'timestamp': '2025-09-30 22:30:45.977235', 'step': 11973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:46.034871', 'step': 11973, 'epoch': 2} {'type': 'loss', 'content': 0.19969406723976135, 'timestamp': '2025-09-30 22:30:46.037402', 'step': 11974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:46.095555', 'step': 11974, 'epoch': 2} {'type': 'loss', 'content': 0.12324491888284683, 'timestamp': '2025-09-30 22:30:46.099297', 'step': 11975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:46.163125', 'step': 11975, 'epoch': 2} {'type': 'loss', 'content': 0.11484737694263458, 'timestamp': '2025-09-30 22:30:46.169690', 'step': 11976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:46.233403', 'step': 11976, 'epoch': 2} {'type': 'loss', 'content': 0.1059010773897171, 'timestamp': '2025-09-30 22:30:46.236971', 'step': 11977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:46.299776', 'step': 11977, 'epoch': 2} {'type': 'loss', 'content': 0.15668094158172607, 'timestamp': '2025-09-30 22:30:46.312352', 'step': 11978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:46.369750', 'step': 11978, 'epoch': 2} {'type': 'loss', 'content': 0.1281319111585617, 'timestamp': '2025-09-30 22:30:46.373339', 'step': 11979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:46.437160', 'step': 11979, 'epoch': 2} {'type': 'loss', 'content': 0.12826576828956604, 'timestamp': '2025-09-30 22:30:46.449688', 'step': 11980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:46.511747', 'step': 11980, 'epoch': 2} {'type': 'loss', 'content': 0.12592682242393494, 'timestamp': '2025-09-30 22:30:46.521019', 'step': 11981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:46.587092', 'step': 11981, 'epoch': 2} {'type': 'loss', 'content': 0.16411101818084717, 'timestamp': '2025-09-30 22:30:46.593663', 'step': 11982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:46.672015', 'step': 11982, 'epoch': 2} {'type': 'loss', 'content': 0.097593754529953, 'timestamp': '2025-09-30 22:30:46.675374', 'step': 11983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:46.733854', 'step': 11983, 'epoch': 2} {'type': 'loss', 'content': 0.09160943329334259, 'timestamp': '2025-09-30 22:30:46.740447', 'step': 11984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:46.802640', 'step': 11984, 'epoch': 2} {'type': 'loss', 'content': 0.07177253067493439, 'timestamp': '2025-09-30 22:30:46.807140', 'step': 11985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:46.868581', 'step': 11985, 'epoch': 2} {'type': 'loss', 'content': 0.13094545900821686, 'timestamp': '2025-09-30 22:30:46.874431', 'step': 11986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:46.937304', 'step': 11986, 'epoch': 2} {'type': 'loss', 'content': 0.12358115613460541, 'timestamp': '2025-09-30 22:30:46.946966', 'step': 11987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:47.009828', 'step': 11987, 'epoch': 2} {'type': 'loss', 'content': 0.06905949115753174, 'timestamp': '2025-09-30 22:30:47.018059', 'step': 11988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:47.078864', 'step': 11988, 'epoch': 2} {'type': 'loss', 'content': 0.12914754450321198, 'timestamp': '2025-09-30 22:30:47.081792', 'step': 11989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:47.153530', 'step': 11989, 'epoch': 2} {'type': 'loss', 'content': 0.10611142963171005, 'timestamp': '2025-09-30 22:30:47.159439', 'step': 11990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:47.222012', 'step': 11990, 'epoch': 2} {'type': 'loss', 'content': 0.1390468180179596, 'timestamp': '2025-09-30 22:30:47.226967', 'step': 11991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:47.288535', 'step': 11991, 'epoch': 2} {'type': 'loss', 'content': 0.14382991194725037, 'timestamp': '2025-09-30 22:30:47.295849', 'step': 11992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:47.354524', 'step': 11992, 'epoch': 2} {'type': 'loss', 'content': 0.06418181955814362, 'timestamp': '2025-09-30 22:30:47.358873', 'step': 11993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:47.431556', 'step': 11993, 'epoch': 2} {'type': 'loss', 'content': 0.27436503767967224, 'timestamp': '2025-09-30 22:30:47.435277', 'step': 11994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:47.498897', 'step': 11994, 'epoch': 2} {'type': 'loss', 'content': 0.12223664671182632, 'timestamp': '2025-09-30 22:30:47.501681', 'step': 11995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:47.564833', 'step': 11995, 'epoch': 2} {'type': 'loss', 'content': 0.15014706552028656, 'timestamp': '2025-09-30 22:30:47.571276', 'step': 11996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:47.647388', 'step': 11996, 'epoch': 2} {'type': 'loss', 'content': 0.10369483381509781, 'timestamp': '2025-09-30 22:30:47.650147', 'step': 11997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:47.711296', 'step': 11997, 'epoch': 2} {'type': 'loss', 'content': 0.17262886464595795, 'timestamp': '2025-09-30 22:30:47.713629', 'step': 11998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:47.771996', 'step': 11998, 'epoch': 2} {'type': 'loss', 'content': 0.09288443624973297, 'timestamp': '2025-09-30 22:30:47.781792', 'step': 11999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:47.847808', 'step': 11999, 'epoch': 2} {'type': 'loss', 'content': 0.1315566450357437, 'timestamp': '2025-09-30 22:30:47.854302', 'step': 12000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12000', 'timestamp': '2025-09-30 22:30:48.290015', 'step': 12000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:48.352231', 'step': 12000, 'epoch': 2} {'type': 'loss', 'content': 0.16219091415405273, 'timestamp': '2025-09-30 22:30:48.358120', 'step': 12001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:30:48.418240', 'step': 12001, 'epoch': 2} {'type': 'loss', 'content': 0.07561769336462021, 'timestamp': '2025-09-30 22:30:48.426635', 'step': 12002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:48.488365', 'step': 12002, 'epoch': 2} {'type': 'loss', 'content': 0.1993829607963562, 'timestamp': '2025-09-30 22:30:48.491438', 'step': 12003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:48.555646', 'step': 12003, 'epoch': 2} {'type': 'loss', 'content': 0.03944260999560356, 'timestamp': '2025-09-30 22:30:48.562933', 'step': 12004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:48.637984', 'step': 12004, 'epoch': 2} {'type': 'loss', 'content': 0.19570675492286682, 'timestamp': '2025-09-30 22:30:48.641970', 'step': 12005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:48.700586', 'step': 12005, 'epoch': 2} {'type': 'loss', 'content': 0.06629886478185654, 'timestamp': '2025-09-30 22:30:48.705714', 'step': 12006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:48.776681', 'step': 12006, 'epoch': 2} {'type': 'loss', 'content': 0.10170985758304596, 'timestamp': '2025-09-30 22:30:48.780090', 'step': 12007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:48.858072', 'step': 12007, 'epoch': 2} {'type': 'loss', 'content': 0.1501360684633255, 'timestamp': '2025-09-30 22:30:48.864838', 'step': 12008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:48.922445', 'step': 12008, 'epoch': 2} {'type': 'loss', 'content': 0.10739655047655106, 'timestamp': '2025-09-30 22:30:48.925530', 'step': 12009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:48.991543', 'step': 12009, 'epoch': 2} {'type': 'loss', 'content': 0.16406722366809845, 'timestamp': '2025-09-30 22:30:48.996336', 'step': 12010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:49.055763', 'step': 12010, 'epoch': 2} {'type': 'loss', 'content': 0.04427844658493996, 'timestamp': '2025-09-30 22:30:49.062352', 'step': 12011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:49.120594', 'step': 12011, 'epoch': 2} {'type': 'loss', 'content': 0.09580580145120621, 'timestamp': '2025-09-30 22:30:49.129057', 'step': 12012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:49.191677', 'step': 12012, 'epoch': 2} {'type': 'loss', 'content': 0.10218030214309692, 'timestamp': '2025-09-30 22:30:49.194774', 'step': 12013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:49.254824', 'step': 12013, 'epoch': 2} {'type': 'loss', 'content': 0.10293639451265335, 'timestamp': '2025-09-30 22:30:49.257555', 'step': 12014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:49.318409', 'step': 12014, 'epoch': 2} {'type': 'loss', 'content': 0.1331968605518341, 'timestamp': '2025-09-30 22:30:49.322301', 'step': 12015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:49.387531', 'step': 12015, 'epoch': 2} {'type': 'loss', 'content': 0.05925460904836655, 'timestamp': '2025-09-30 22:30:49.395004', 'step': 12016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:49.454552', 'step': 12016, 'epoch': 2} {'type': 'loss', 'content': 0.12088527530431747, 'timestamp': '2025-09-30 22:30:49.457693', 'step': 12017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:49.521115', 'step': 12017, 'epoch': 2} {'type': 'loss', 'content': 0.0849735364317894, 'timestamp': '2025-09-30 22:30:49.523866', 'step': 12018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:49.584301', 'step': 12018, 'epoch': 2} {'type': 'loss', 'content': 0.07103944569826126, 'timestamp': '2025-09-30 22:30:49.593443', 'step': 12019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:49.657949', 'step': 12019, 'epoch': 2} {'type': 'loss', 'content': 0.08568058162927628, 'timestamp': '2025-09-30 22:30:49.663905', 'step': 12020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:49.722606', 'step': 12020, 'epoch': 2} {'type': 'loss', 'content': 0.08796219527721405, 'timestamp': '2025-09-30 22:30:49.726899', 'step': 12021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:49.794612', 'step': 12021, 'epoch': 2} {'type': 'loss', 'content': 0.12035088241100311, 'timestamp': '2025-09-30 22:30:49.801142', 'step': 12022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:49.861441', 'step': 12022, 'epoch': 2} {'type': 'loss', 'content': 0.11462175101041794, 'timestamp': '2025-09-30 22:30:49.863558', 'step': 12023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:49.933671', 'step': 12023, 'epoch': 2} {'type': 'loss', 'content': 0.13661937415599823, 'timestamp': '2025-09-30 22:30:49.941878', 'step': 12024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:50.001425', 'step': 12024, 'epoch': 2} {'type': 'loss', 'content': 0.1097528412938118, 'timestamp': '2025-09-30 22:30:50.004498', 'step': 12025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:50.064177', 'step': 12025, 'epoch': 2} {'type': 'loss', 'content': 0.06826991587877274, 'timestamp': '2025-09-30 22:30:50.071436', 'step': 12026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:50.136757', 'step': 12026, 'epoch': 2} {'type': 'loss', 'content': 0.15146182477474213, 'timestamp': '2025-09-30 22:30:50.142373', 'step': 12027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:50.199561', 'step': 12027, 'epoch': 2} {'type': 'loss', 'content': 0.07761332392692566, 'timestamp': '2025-09-30 22:30:50.209816', 'step': 12028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:50.277046', 'step': 12028, 'epoch': 2} {'type': 'loss', 'content': 0.15165506303310394, 'timestamp': '2025-09-30 22:30:50.280542', 'step': 12029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:50.340275', 'step': 12029, 'epoch': 2} {'type': 'loss', 'content': 0.17543409764766693, 'timestamp': '2025-09-30 22:30:50.343709', 'step': 12030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:50.411541', 'step': 12030, 'epoch': 2} {'type': 'loss', 'content': 0.07812779396772385, 'timestamp': '2025-09-30 22:30:50.415001', 'step': 12031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:50.478187', 'step': 12031, 'epoch': 2} {'type': 'loss', 'content': 0.12294274568557739, 'timestamp': '2025-09-30 22:30:50.485496', 'step': 12032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:50.547157', 'step': 12032, 'epoch': 2} {'type': 'loss', 'content': 0.1575167328119278, 'timestamp': '2025-09-30 22:30:50.550573', 'step': 12033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:50.617154', 'step': 12033, 'epoch': 2} {'type': 'loss', 'content': 0.06241631507873535, 'timestamp': '2025-09-30 22:30:50.620135', 'step': 12034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:50.686543', 'step': 12034, 'epoch': 2} {'type': 'loss', 'content': 0.10003623366355896, 'timestamp': '2025-09-30 22:30:50.694868', 'step': 12035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:50.754169', 'step': 12035, 'epoch': 2} {'type': 'loss', 'content': 0.09212367981672287, 'timestamp': '2025-09-30 22:30:50.771242', 'step': 12036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:50.830294', 'step': 12036, 'epoch': 2} {'type': 'loss', 'content': 0.10584168881177902, 'timestamp': '2025-09-30 22:30:50.835697', 'step': 12037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:50.897349', 'step': 12037, 'epoch': 2} {'type': 'loss', 'content': 0.11166784912347794, 'timestamp': '2025-09-30 22:30:50.906234', 'step': 12038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:50.974007', 'step': 12038, 'epoch': 2} {'type': 'loss', 'content': 0.035451699048280716, 'timestamp': '2025-09-30 22:30:50.981318', 'step': 12039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:51.038748', 'step': 12039, 'epoch': 2} {'type': 'loss', 'content': 0.06588412821292877, 'timestamp': '2025-09-30 22:30:51.045906', 'step': 12040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:51.103856', 'step': 12040, 'epoch': 2} {'type': 'loss', 'content': 0.11080518364906311, 'timestamp': '2025-09-30 22:30:51.106818', 'step': 12041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:51.165559', 'step': 12041, 'epoch': 2} {'type': 'loss', 'content': 0.12414664775133133, 'timestamp': '2025-09-30 22:30:51.177357', 'step': 12042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:51.237963', 'step': 12042, 'epoch': 2} {'type': 'loss', 'content': 0.2262234389781952, 'timestamp': '2025-09-30 22:30:51.240993', 'step': 12043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:51.299405', 'step': 12043, 'epoch': 2} {'type': 'loss', 'content': 0.0660051554441452, 'timestamp': '2025-09-30 22:30:51.306172', 'step': 12044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:51.364706', 'step': 12044, 'epoch': 2} {'type': 'loss', 'content': 0.17351341247558594, 'timestamp': '2025-09-30 22:30:51.367915', 'step': 12045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:51.436556', 'step': 12045, 'epoch': 2} {'type': 'loss', 'content': 0.07635041326284409, 'timestamp': '2025-09-30 22:30:51.443840', 'step': 12046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:51.507194', 'step': 12046, 'epoch': 2} {'type': 'loss', 'content': 0.08486822992563248, 'timestamp': '2025-09-30 22:30:51.512519', 'step': 12047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:51.574442', 'step': 12047, 'epoch': 2} {'type': 'loss', 'content': 0.15310510993003845, 'timestamp': '2025-09-30 22:30:51.581008', 'step': 12048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:51.642857', 'step': 12048, 'epoch': 2} {'type': 'loss', 'content': 0.11273898184299469, 'timestamp': '2025-09-30 22:30:51.645870', 'step': 12049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:51.707590', 'step': 12049, 'epoch': 2} {'type': 'loss', 'content': 0.09524210542440414, 'timestamp': '2025-09-30 22:30:51.711339', 'step': 12050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:51.770838', 'step': 12050, 'epoch': 2} {'type': 'loss', 'content': 0.09507361054420471, 'timestamp': '2025-09-30 22:30:51.776162', 'step': 12051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:30:51.835084', 'step': 12051, 'epoch': 2} {'type': 'loss', 'content': 0.13021987676620483, 'timestamp': '2025-09-30 22:30:51.842048', 'step': 12052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:51.900281', 'step': 12052, 'epoch': 2} {'type': 'loss', 'content': 0.13000960648059845, 'timestamp': '2025-09-30 22:30:51.903763', 'step': 12053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:51.965908', 'step': 12053, 'epoch': 2} {'type': 'loss', 'content': 0.20799104869365692, 'timestamp': '2025-09-30 22:30:51.972354', 'step': 12054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:52.037483', 'step': 12054, 'epoch': 2} {'type': 'loss', 'content': 0.15756653249263763, 'timestamp': '2025-09-30 22:30:52.041626', 'step': 12055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:52.098757', 'step': 12055, 'epoch': 2} {'type': 'loss', 'content': 0.08991272747516632, 'timestamp': '2025-09-30 22:30:52.105701', 'step': 12056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:52.161925', 'step': 12056, 'epoch': 2} {'type': 'loss', 'content': 0.11662607640028, 'timestamp': '2025-09-30 22:30:52.165352', 'step': 12057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:52.226438', 'step': 12057, 'epoch': 2} {'type': 'loss', 'content': 0.07356236129999161, 'timestamp': '2025-09-30 22:30:52.229101', 'step': 12058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:52.286004', 'step': 12058, 'epoch': 2} {'type': 'loss', 'content': 0.11473298072814941, 'timestamp': '2025-09-30 22:30:52.292071', 'step': 12059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:52.350463', 'step': 12059, 'epoch': 2} {'type': 'loss', 'content': 0.1369519978761673, 'timestamp': '2025-09-30 22:30:52.357021', 'step': 12060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:52.414226', 'step': 12060, 'epoch': 2} {'type': 'loss', 'content': 0.16466499865055084, 'timestamp': '2025-09-30 22:30:52.417303', 'step': 12061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:52.480525', 'step': 12061, 'epoch': 2} {'type': 'loss', 'content': 0.06392361968755722, 'timestamp': '2025-09-30 22:30:52.483993', 'step': 12062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:52.542512', 'step': 12062, 'epoch': 2} {'type': 'loss', 'content': 0.14440922439098358, 'timestamp': '2025-09-30 22:30:52.555422', 'step': 12063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:52.617340', 'step': 12063, 'epoch': 2} {'type': 'loss', 'content': 0.11546164751052856, 'timestamp': '2025-09-30 22:30:52.626845', 'step': 12064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:52.684123', 'step': 12064, 'epoch': 2} {'type': 'loss', 'content': 0.11091821640729904, 'timestamp': '2025-09-30 22:30:52.692237', 'step': 12065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:52.751703', 'step': 12065, 'epoch': 2} {'type': 'loss', 'content': 0.10426298528909683, 'timestamp': '2025-09-30 22:30:52.754328', 'step': 12066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:52.813479', 'step': 12066, 'epoch': 2} {'type': 'loss', 'content': 0.07534130662679672, 'timestamp': '2025-09-30 22:30:52.818332', 'step': 12067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:52.878840', 'step': 12067, 'epoch': 2} {'type': 'loss', 'content': 0.0965542271733284, 'timestamp': '2025-09-30 22:30:52.885189', 'step': 12068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:52.942583', 'step': 12068, 'epoch': 2} {'type': 'loss', 'content': 0.11536174267530441, 'timestamp': '2025-09-30 22:30:52.946005', 'step': 12069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:53.019461', 'step': 12069, 'epoch': 2} {'type': 'loss', 'content': 0.10822796076536179, 'timestamp': '2025-09-30 22:30:53.022267', 'step': 12070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:53.079931', 'step': 12070, 'epoch': 2} {'type': 'loss', 'content': 0.15156304836273193, 'timestamp': '2025-09-30 22:30:53.087777', 'step': 12071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:53.146758', 'step': 12071, 'epoch': 2} {'type': 'loss', 'content': 0.08418522775173187, 'timestamp': '2025-09-30 22:30:53.153692', 'step': 12072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:53.212961', 'step': 12072, 'epoch': 2} {'type': 'loss', 'content': 0.25633159279823303, 'timestamp': '2025-09-30 22:30:53.217085', 'step': 12073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:53.276781', 'step': 12073, 'epoch': 2} {'type': 'loss', 'content': 0.1120549812912941, 'timestamp': '2025-09-30 22:30:53.279536', 'step': 12074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:53.337833', 'step': 12074, 'epoch': 2} {'type': 'loss', 'content': 0.08706635236740112, 'timestamp': '2025-09-30 22:30:53.343170', 'step': 12075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:53.403604', 'step': 12075, 'epoch': 2} {'type': 'loss', 'content': 0.07729838043451309, 'timestamp': '2025-09-30 22:30:53.414465', 'step': 12076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:53.480600', 'step': 12076, 'epoch': 2} {'type': 'loss', 'content': 0.06698571890592575, 'timestamp': '2025-09-30 22:30:53.483590', 'step': 12077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:53.542413', 'step': 12077, 'epoch': 2} {'type': 'loss', 'content': 0.06672505289316177, 'timestamp': '2025-09-30 22:30:53.547586', 'step': 12078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:53.610384', 'step': 12078, 'epoch': 2} {'type': 'loss', 'content': 0.101730115711689, 'timestamp': '2025-09-30 22:30:53.614364', 'step': 12079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:53.673948', 'step': 12079, 'epoch': 2} {'type': 'loss', 'content': 0.21788138151168823, 'timestamp': '2025-09-30 22:30:53.680756', 'step': 12080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:53.739224', 'step': 12080, 'epoch': 2} {'type': 'loss', 'content': 0.1495385617017746, 'timestamp': '2025-09-30 22:30:53.755435', 'step': 12081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:53.813928', 'step': 12081, 'epoch': 2} {'type': 'loss', 'content': 0.11912782490253448, 'timestamp': '2025-09-30 22:30:53.817468', 'step': 12082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:53.877064', 'step': 12082, 'epoch': 2} {'type': 'loss', 'content': 0.0740494579076767, 'timestamp': '2025-09-30 22:30:53.880123', 'step': 12083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:53.937340', 'step': 12083, 'epoch': 2} {'type': 'loss', 'content': 0.10826998949050903, 'timestamp': '2025-09-30 22:30:53.943136', 'step': 12084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:54.001734', 'step': 12084, 'epoch': 2} {'type': 'loss', 'content': 0.2792564034461975, 'timestamp': '2025-09-30 22:30:54.006246', 'step': 12085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:54.069961', 'step': 12085, 'epoch': 2} {'type': 'loss', 'content': 0.15598291158676147, 'timestamp': '2025-09-30 22:30:54.073260', 'step': 12086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:30:54.132893', 'step': 12086, 'epoch': 2} {'type': 'loss', 'content': 0.1252499222755432, 'timestamp': '2025-09-30 22:30:54.139242', 'step': 12087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:54.222744', 'step': 12087, 'epoch': 2} {'type': 'loss', 'content': 0.06687507033348083, 'timestamp': '2025-09-30 22:30:54.231499', 'step': 12088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:54.323259', 'step': 12088, 'epoch': 2} {'type': 'loss', 'content': 0.16596491634845734, 'timestamp': '2025-09-30 22:30:54.326108', 'step': 12089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:54.394957', 'step': 12089, 'epoch': 2} {'type': 'loss', 'content': 0.13613443076610565, 'timestamp': '2025-09-30 22:30:54.399515', 'step': 12090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:54.493838', 'step': 12090, 'epoch': 2} {'type': 'loss', 'content': 0.09298183023929596, 'timestamp': '2025-09-30 22:30:54.505809', 'step': 12091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:54.588446', 'step': 12091, 'epoch': 2} {'type': 'loss', 'content': 0.13315892219543457, 'timestamp': '2025-09-30 22:30:54.597755', 'step': 12092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:54.673646', 'step': 12092, 'epoch': 2} {'type': 'loss', 'content': 0.15274640917778015, 'timestamp': '2025-09-30 22:30:54.680750', 'step': 12093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:54.741385', 'step': 12093, 'epoch': 2} {'type': 'loss', 'content': 0.09160049259662628, 'timestamp': '2025-09-30 22:30:54.745830', 'step': 12094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:54.811555', 'step': 12094, 'epoch': 2} {'type': 'loss', 'content': 0.10796669870615005, 'timestamp': '2025-09-30 22:30:54.817667', 'step': 12095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:54.901660', 'step': 12095, 'epoch': 2} {'type': 'loss', 'content': 0.10452333092689514, 'timestamp': '2025-09-30 22:30:54.908348', 'step': 12096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:55.016936', 'step': 12096, 'epoch': 2} {'type': 'loss', 'content': 0.18337082862854004, 'timestamp': '2025-09-30 22:30:55.022656', 'step': 12097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:55.102630', 'step': 12097, 'epoch': 2} {'type': 'loss', 'content': 0.15981228649616241, 'timestamp': '2025-09-30 22:30:55.105346', 'step': 12098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:55.173687', 'step': 12098, 'epoch': 2} {'type': 'loss', 'content': 0.23663638532161713, 'timestamp': '2025-09-30 22:30:55.176571', 'step': 12099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:55.235702', 'step': 12099, 'epoch': 2} {'type': 'loss', 'content': 0.1782362461090088, 'timestamp': '2025-09-30 22:30:55.244879', 'step': 12100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:55.312757', 'step': 12100, 'epoch': 2} {'type': 'loss', 'content': 0.13982750475406647, 'timestamp': '2025-09-30 22:30:55.316241', 'step': 12101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:30:55.379478', 'step': 12101, 'epoch': 2} {'type': 'loss', 'content': 0.12165390700101852, 'timestamp': '2025-09-30 22:30:55.383082', 'step': 12102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:55.441689', 'step': 12102, 'epoch': 2} {'type': 'loss', 'content': 0.05243714153766632, 'timestamp': '2025-09-30 22:30:55.445337', 'step': 12103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:55.505526', 'step': 12103, 'epoch': 2} {'type': 'loss', 'content': 0.15394309163093567, 'timestamp': '2025-09-30 22:30:55.513309', 'step': 12104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:55.582652', 'step': 12104, 'epoch': 2} {'type': 'loss', 'content': 0.12333370745182037, 'timestamp': '2025-09-30 22:30:55.586801', 'step': 12105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:55.646805', 'step': 12105, 'epoch': 2} {'type': 'loss', 'content': 0.1009339690208435, 'timestamp': '2025-09-30 22:30:55.654413', 'step': 12106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:55.731263', 'step': 12106, 'epoch': 2} {'type': 'loss', 'content': 0.15170639753341675, 'timestamp': '2025-09-30 22:30:55.736702', 'step': 12107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:55.798395', 'step': 12107, 'epoch': 2} {'type': 'loss', 'content': 0.10594310611486435, 'timestamp': '2025-09-30 22:30:55.810836', 'step': 12108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:30:55.873365', 'step': 12108, 'epoch': 2} {'type': 'loss', 'content': 0.0996650978922844, 'timestamp': '2025-09-30 22:30:55.877188', 'step': 12109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:55.941503', 'step': 12109, 'epoch': 2} {'type': 'loss', 'content': 0.12175841629505157, 'timestamp': '2025-09-30 22:30:55.946092', 'step': 12110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:56.005817', 'step': 12110, 'epoch': 2} {'type': 'loss', 'content': 0.23111127316951752, 'timestamp': '2025-09-30 22:30:56.013056', 'step': 12111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:30:56.073173', 'step': 12111, 'epoch': 2} {'type': 'loss', 'content': 0.06944353878498077, 'timestamp': '2025-09-30 22:30:56.081464', 'step': 12112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:56.141341', 'step': 12112, 'epoch': 2} {'type': 'loss', 'content': 0.1240742951631546, 'timestamp': '2025-09-30 22:30:56.147633', 'step': 12113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:30:56.209382', 'step': 12113, 'epoch': 2} {'type': 'loss', 'content': 0.22543907165527344, 'timestamp': '2025-09-30 22:30:56.212456', 'step': 12114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:56.281839', 'step': 12114, 'epoch': 2} {'type': 'loss', 'content': 0.11799450218677521, 'timestamp': '2025-09-30 22:30:56.286248', 'step': 12115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:30:56.346365', 'step': 12115, 'epoch': 2} {'type': 'loss', 'content': 0.06871975213289261, 'timestamp': '2025-09-30 22:30:56.361647', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:31:12.690729', 'step': 12116, 'epoch': 2} {'type': 'pplx', 'content': 12450.650619082053, 'timestamp': '2025-09-30 22:31:12.698558', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:12.759607', 'step': 12116, 'epoch': 2} {'type': 'loss', 'content': 0.13291287422180176, 'timestamp': '2025-09-30 22:31:12.765310', 'step': 12117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:12.824819', 'step': 12117, 'epoch': 2} {'type': 'loss', 'content': 0.15463653206825256, 'timestamp': '2025-09-30 22:31:12.827274', 'step': 12118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:12.885831', 'step': 12118, 'epoch': 2} {'type': 'loss', 'content': 0.0879117101430893, 'timestamp': '2025-09-30 22:31:12.888399', 'step': 12119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:12.950843', 'step': 12119, 'epoch': 2} {'type': 'loss', 'content': 0.1739002764225006, 'timestamp': '2025-09-30 22:31:12.957366', 'step': 12120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.030464', 'step': 12120, 'epoch': 2} {'type': 'loss', 'content': 0.07135483622550964, 'timestamp': '2025-09-30 22:31:13.033759', 'step': 12121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.092108', 'step': 12121, 'epoch': 2} {'type': 'loss', 'content': 0.15386265516281128, 'timestamp': '2025-09-30 22:31:13.102124', 'step': 12122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:13.163480', 'step': 12122, 'epoch': 2} {'type': 'loss', 'content': 0.13308154046535492, 'timestamp': '2025-09-30 22:31:13.165852', 'step': 12123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.242287', 'step': 12123, 'epoch': 2} {'type': 'loss', 'content': 0.11302803456783295, 'timestamp': '2025-09-30 22:31:13.248726', 'step': 12124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.306125', 'step': 12124, 'epoch': 2} {'type': 'loss', 'content': 0.09141594171524048, 'timestamp': '2025-09-30 22:31:13.309230', 'step': 12125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.369051', 'step': 12125, 'epoch': 2} {'type': 'loss', 'content': 0.15210387110710144, 'timestamp': '2025-09-30 22:31:13.379475', 'step': 12126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:13.444448', 'step': 12126, 'epoch': 2} {'type': 'loss', 'content': 0.09339544177055359, 'timestamp': '2025-09-30 22:31:13.449690', 'step': 12127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.512952', 'step': 12127, 'epoch': 2} {'type': 'loss', 'content': 0.16150997579097748, 'timestamp': '2025-09-30 22:31:13.520354', 'step': 12128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:13.592064', 'step': 12128, 'epoch': 2} {'type': 'loss', 'content': 0.10896877199411392, 'timestamp': '2025-09-30 22:31:13.598791', 'step': 12129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:13.676799', 'step': 12129, 'epoch': 2} {'type': 'loss', 'content': 0.15625034272670746, 'timestamp': '2025-09-30 22:31:13.683113', 'step': 12130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.744072', 'step': 12130, 'epoch': 2} {'type': 'loss', 'content': 0.20512837171554565, 'timestamp': '2025-09-30 22:31:13.746593', 'step': 12131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:13.813624', 'step': 12131, 'epoch': 2} {'type': 'loss', 'content': 0.1748410016298294, 'timestamp': '2025-09-30 22:31:13.819857', 'step': 12132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:13.879235', 'step': 12132, 'epoch': 2} {'type': 'loss', 'content': 0.15041974186897278, 'timestamp': '2025-09-30 22:31:13.886266', 'step': 12133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:13.946170', 'step': 12133, 'epoch': 2} {'type': 'loss', 'content': 0.12953391671180725, 'timestamp': '2025-09-30 22:31:13.949781', 'step': 12134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:14.029304', 'step': 12134, 'epoch': 2} {'type': 'loss', 'content': 0.16121609508991241, 'timestamp': '2025-09-30 22:31:14.038381', 'step': 12135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:14.103391', 'step': 12135, 'epoch': 2} {'type': 'loss', 'content': 0.1702372282743454, 'timestamp': '2025-09-30 22:31:14.110929', 'step': 12136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:14.187955', 'step': 12136, 'epoch': 2} {'type': 'loss', 'content': 0.16742068529129028, 'timestamp': '2025-09-30 22:31:14.194793', 'step': 12137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:14.254841', 'step': 12137, 'epoch': 2} {'type': 'loss', 'content': 0.12234906852245331, 'timestamp': '2025-09-30 22:31:14.259688', 'step': 12138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:14.319763', 'step': 12138, 'epoch': 2} {'type': 'loss', 'content': 0.1679566651582718, 'timestamp': '2025-09-30 22:31:14.324357', 'step': 12139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:14.385442', 'step': 12139, 'epoch': 2} {'type': 'loss', 'content': 0.11163786053657532, 'timestamp': '2025-09-30 22:31:14.395150', 'step': 12140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:14.455349', 'step': 12140, 'epoch': 2} {'type': 'loss', 'content': 0.09058471769094467, 'timestamp': '2025-09-30 22:31:14.457576', 'step': 12141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:14.518329', 'step': 12141, 'epoch': 2} {'type': 'loss', 'content': 0.16654394567012787, 'timestamp': '2025-09-30 22:31:14.520467', 'step': 12142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:14.581786', 'step': 12142, 'epoch': 2} {'type': 'loss', 'content': 0.05870090797543526, 'timestamp': '2025-09-30 22:31:14.586515', 'step': 12143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:14.645308', 'step': 12143, 'epoch': 2} {'type': 'loss', 'content': 0.17855161428451538, 'timestamp': '2025-09-30 22:31:14.652431', 'step': 12144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:14.712538', 'step': 12144, 'epoch': 2} {'type': 'loss', 'content': 0.13226225972175598, 'timestamp': '2025-09-30 22:31:14.716393', 'step': 12145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:14.778176', 'step': 12145, 'epoch': 2} {'type': 'loss', 'content': 0.16734550893306732, 'timestamp': '2025-09-30 22:31:14.781394', 'step': 12146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:14.839758', 'step': 12146, 'epoch': 2} {'type': 'loss', 'content': 0.1275249570608139, 'timestamp': '2025-09-30 22:31:14.844150', 'step': 12147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:14.905562', 'step': 12147, 'epoch': 2} {'type': 'loss', 'content': 0.13263975083827972, 'timestamp': '2025-09-30 22:31:14.914090', 'step': 12148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:14.972701', 'step': 12148, 'epoch': 2} {'type': 'loss', 'content': 0.034939926117658615, 'timestamp': '2025-09-30 22:31:14.975996', 'step': 12149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:15.038243', 'step': 12149, 'epoch': 2} {'type': 'loss', 'content': 0.1071973592042923, 'timestamp': '2025-09-30 22:31:15.041160', 'step': 12150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:15.100539', 'step': 12150, 'epoch': 2} {'type': 'loss', 'content': 0.15497878193855286, 'timestamp': '2025-09-30 22:31:15.105370', 'step': 12151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:15.165882', 'step': 12151, 'epoch': 2} {'type': 'loss', 'content': 0.07674331218004227, 'timestamp': '2025-09-30 22:31:15.173949', 'step': 12152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:15.242253', 'step': 12152, 'epoch': 2} {'type': 'loss', 'content': 0.0884518027305603, 'timestamp': '2025-09-30 22:31:15.245093', 'step': 12153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:15.309681', 'step': 12153, 'epoch': 2} {'type': 'loss', 'content': 0.13729579746723175, 'timestamp': '2025-09-30 22:31:15.312562', 'step': 12154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:15.373860', 'step': 12154, 'epoch': 2} {'type': 'loss', 'content': 0.0959819108247757, 'timestamp': '2025-09-30 22:31:15.376730', 'step': 12155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:15.435720', 'step': 12155, 'epoch': 2} {'type': 'loss', 'content': 0.09914854168891907, 'timestamp': '2025-09-30 22:31:15.442556', 'step': 12156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:15.503597', 'step': 12156, 'epoch': 2} {'type': 'loss', 'content': 0.07219681143760681, 'timestamp': '2025-09-30 22:31:15.506783', 'step': 12157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:15.576145', 'step': 12157, 'epoch': 2} {'type': 'loss', 'content': 0.1991576850414276, 'timestamp': '2025-09-30 22:31:15.579443', 'step': 12158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:15.639257', 'step': 12158, 'epoch': 2} {'type': 'loss', 'content': 0.10872425884008408, 'timestamp': '2025-09-30 22:31:15.643016', 'step': 12159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:15.700246', 'step': 12159, 'epoch': 2} {'type': 'loss', 'content': 0.236465185880661, 'timestamp': '2025-09-30 22:31:15.708677', 'step': 12160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:15.766087', 'step': 12160, 'epoch': 2} {'type': 'loss', 'content': 0.11323657631874084, 'timestamp': '2025-09-30 22:31:15.769163', 'step': 12161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:15.827032', 'step': 12161, 'epoch': 2} {'type': 'loss', 'content': 0.06726693361997604, 'timestamp': '2025-09-30 22:31:15.830043', 'step': 12162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:15.888609', 'step': 12162, 'epoch': 2} {'type': 'loss', 'content': 0.07460785657167435, 'timestamp': '2025-09-30 22:31:15.902179', 'step': 12163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:15.961855', 'step': 12163, 'epoch': 2} {'type': 'loss', 'content': 0.1668582409620285, 'timestamp': '2025-09-30 22:31:15.979565', 'step': 12164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.037320', 'step': 12164, 'epoch': 2} {'type': 'loss', 'content': 0.07569021731615067, 'timestamp': '2025-09-30 22:31:16.040334', 'step': 12165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:16.098118', 'step': 12165, 'epoch': 2} {'type': 'loss', 'content': 0.10168085992336273, 'timestamp': '2025-09-30 22:31:16.101441', 'step': 12166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.160676', 'step': 12166, 'epoch': 2} {'type': 'loss', 'content': 0.10814762860536575, 'timestamp': '2025-09-30 22:31:16.163149', 'step': 12167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:16.221540', 'step': 12167, 'epoch': 2} {'type': 'loss', 'content': 0.12604589760303497, 'timestamp': '2025-09-30 22:31:16.227764', 'step': 12168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:16.285989', 'step': 12168, 'epoch': 2} {'type': 'loss', 'content': 0.05897153541445732, 'timestamp': '2025-09-30 22:31:16.289322', 'step': 12169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.347685', 'step': 12169, 'epoch': 2} {'type': 'loss', 'content': 0.09511874616146088, 'timestamp': '2025-09-30 22:31:16.351228', 'step': 12170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:16.409825', 'step': 12170, 'epoch': 2} {'type': 'loss', 'content': 0.135373055934906, 'timestamp': '2025-09-30 22:31:16.414931', 'step': 12171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.474633', 'step': 12171, 'epoch': 2} {'type': 'loss', 'content': 0.1346815526485443, 'timestamp': '2025-09-30 22:31:16.484268', 'step': 12172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.550899', 'step': 12172, 'epoch': 2} {'type': 'loss', 'content': 0.15671129524707794, 'timestamp': '2025-09-30 22:31:16.553505', 'step': 12173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:16.620299', 'step': 12173, 'epoch': 2} {'type': 'loss', 'content': 0.325302392244339, 'timestamp': '2025-09-30 22:31:16.623902', 'step': 12174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:16.688307', 'step': 12174, 'epoch': 2} {'type': 'loss', 'content': 0.2069525569677353, 'timestamp': '2025-09-30 22:31:16.695823', 'step': 12175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.771895', 'step': 12175, 'epoch': 2} {'type': 'loss', 'content': 0.15266698598861694, 'timestamp': '2025-09-30 22:31:16.780597', 'step': 12176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.837424', 'step': 12176, 'epoch': 2} {'type': 'loss', 'content': 0.1783812940120697, 'timestamp': '2025-09-30 22:31:16.840349', 'step': 12177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:16.897814', 'step': 12177, 'epoch': 2} {'type': 'loss', 'content': 0.0959930568933487, 'timestamp': '2025-09-30 22:31:16.901490', 'step': 12178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:16.958875', 'step': 12178, 'epoch': 2} {'type': 'loss', 'content': 0.1378430873155594, 'timestamp': '2025-09-30 22:31:16.962156', 'step': 12179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:17.020583', 'step': 12179, 'epoch': 2} {'type': 'loss', 'content': 0.06944601237773895, 'timestamp': '2025-09-30 22:31:17.028467', 'step': 12180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:17.085885', 'step': 12180, 'epoch': 2} {'type': 'loss', 'content': 0.18099763989448547, 'timestamp': '2025-09-30 22:31:17.089609', 'step': 12181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:17.148437', 'step': 12181, 'epoch': 2} {'type': 'loss', 'content': 0.08669347316026688, 'timestamp': '2025-09-30 22:31:17.151044', 'step': 12182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:17.207368', 'step': 12182, 'epoch': 2} {'type': 'loss', 'content': 0.09042193740606308, 'timestamp': '2025-09-30 22:31:17.211641', 'step': 12183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:17.270709', 'step': 12183, 'epoch': 2} {'type': 'loss', 'content': 0.10436014086008072, 'timestamp': '2025-09-30 22:31:17.283009', 'step': 12184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:17.340050', 'step': 12184, 'epoch': 2} {'type': 'loss', 'content': 0.08100751042366028, 'timestamp': '2025-09-30 22:31:17.343059', 'step': 12185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:17.399861', 'step': 12185, 'epoch': 2} {'type': 'loss', 'content': 0.2632659077644348, 'timestamp': '2025-09-30 22:31:17.402753', 'step': 12186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:17.460225', 'step': 12186, 'epoch': 2} {'type': 'loss', 'content': 0.20083801448345184, 'timestamp': '2025-09-30 22:31:17.465020', 'step': 12187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:17.522637', 'step': 12187, 'epoch': 2} {'type': 'loss', 'content': 0.1966748982667923, 'timestamp': '2025-09-30 22:31:17.534798', 'step': 12188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:17.592611', 'step': 12188, 'epoch': 2} {'type': 'loss', 'content': 0.055755309760570526, 'timestamp': '2025-09-30 22:31:17.595766', 'step': 12189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:17.656478', 'step': 12189, 'epoch': 2} {'type': 'loss', 'content': 0.08399605751037598, 'timestamp': '2025-09-30 22:31:17.669628', 'step': 12190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:17.732261', 'step': 12190, 'epoch': 2} {'type': 'loss', 'content': 0.12991607189178467, 'timestamp': '2025-09-30 22:31:17.735680', 'step': 12191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:17.797242', 'step': 12191, 'epoch': 2} {'type': 'loss', 'content': 0.12234102934598923, 'timestamp': '2025-09-30 22:31:17.805210', 'step': 12192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:17.861418', 'step': 12192, 'epoch': 2} {'type': 'loss', 'content': 0.12568654119968414, 'timestamp': '2025-09-30 22:31:17.866692', 'step': 12193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:17.925397', 'step': 12193, 'epoch': 2} {'type': 'loss', 'content': 0.10030503571033478, 'timestamp': '2025-09-30 22:31:17.928998', 'step': 12194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:17.989066', 'step': 12194, 'epoch': 2} {'type': 'loss', 'content': 0.20453789830207825, 'timestamp': '2025-09-30 22:31:17.995519', 'step': 12195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:18.056197', 'step': 12195, 'epoch': 2} {'type': 'loss', 'content': 0.12142106890678406, 'timestamp': '2025-09-30 22:31:18.072733', 'step': 12196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:18.143059', 'step': 12196, 'epoch': 2} {'type': 'loss', 'content': 0.09197642654180527, 'timestamp': '2025-09-30 22:31:18.147008', 'step': 12197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:18.211579', 'step': 12197, 'epoch': 2} {'type': 'loss', 'content': 0.13753977417945862, 'timestamp': '2025-09-30 22:31:18.220657', 'step': 12198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:18.289313', 'step': 12198, 'epoch': 2} {'type': 'loss', 'content': 0.11469794064760208, 'timestamp': '2025-09-30 22:31:18.293309', 'step': 12199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:18.350962', 'step': 12199, 'epoch': 2} {'type': 'loss', 'content': 0.0895337238907814, 'timestamp': '2025-09-30 22:31:18.357669', 'step': 12200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:18.420954', 'step': 12200, 'epoch': 2} {'type': 'loss', 'content': 0.18228945136070251, 'timestamp': '2025-09-30 22:31:18.432578', 'step': 12201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:18.495050', 'step': 12201, 'epoch': 2} {'type': 'loss', 'content': 0.07657898962497711, 'timestamp': '2025-09-30 22:31:18.502394', 'step': 12202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:18.574835', 'step': 12202, 'epoch': 2} {'type': 'loss', 'content': 0.11260441690683365, 'timestamp': '2025-09-30 22:31:18.579252', 'step': 12203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:18.638191', 'step': 12203, 'epoch': 2} {'type': 'loss', 'content': 0.05811535567045212, 'timestamp': '2025-09-30 22:31:18.644771', 'step': 12204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:18.700907', 'step': 12204, 'epoch': 2} {'type': 'loss', 'content': 0.08076876401901245, 'timestamp': '2025-09-30 22:31:18.703439', 'step': 12205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:18.761039', 'step': 12205, 'epoch': 2} {'type': 'loss', 'content': 0.21572183072566986, 'timestamp': '2025-09-30 22:31:18.764976', 'step': 12206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:18.827063', 'step': 12206, 'epoch': 2} {'type': 'loss', 'content': 0.14381705224514008, 'timestamp': '2025-09-30 22:31:18.830122', 'step': 12207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:18.904917', 'step': 12207, 'epoch': 2} {'type': 'loss', 'content': 0.06988877058029175, 'timestamp': '2025-09-30 22:31:18.911785', 'step': 12208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:18.969007', 'step': 12208, 'epoch': 2} {'type': 'loss', 'content': 0.1573222279548645, 'timestamp': '2025-09-30 22:31:18.972443', 'step': 12209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:19.030931', 'step': 12209, 'epoch': 2} {'type': 'loss', 'content': 0.05657784640789032, 'timestamp': '2025-09-30 22:31:19.033352', 'step': 12210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:19.091236', 'step': 12210, 'epoch': 2} {'type': 'loss', 'content': 0.14917001128196716, 'timestamp': '2025-09-30 22:31:19.095260', 'step': 12211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:19.157853', 'step': 12211, 'epoch': 2} {'type': 'loss', 'content': 0.14288976788520813, 'timestamp': '2025-09-30 22:31:19.164564', 'step': 12212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:19.221730', 'step': 12212, 'epoch': 2} {'type': 'loss', 'content': 0.20582890510559082, 'timestamp': '2025-09-30 22:31:19.224765', 'step': 12213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:19.283410', 'step': 12213, 'epoch': 2} {'type': 'loss', 'content': 0.15611770749092102, 'timestamp': '2025-09-30 22:31:19.298686', 'step': 12214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:19.369555', 'step': 12214, 'epoch': 2} {'type': 'loss', 'content': 0.16630928218364716, 'timestamp': '2025-09-30 22:31:19.373968', 'step': 12215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:19.434793', 'step': 12215, 'epoch': 2} {'type': 'loss', 'content': 0.15297886729240417, 'timestamp': '2025-09-30 22:31:19.444210', 'step': 12216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:19.502228', 'step': 12216, 'epoch': 2} {'type': 'loss', 'content': 0.12920333445072174, 'timestamp': '2025-09-30 22:31:19.507452', 'step': 12217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:19.566532', 'step': 12217, 'epoch': 2} {'type': 'loss', 'content': 0.09137371182441711, 'timestamp': '2025-09-30 22:31:19.574833', 'step': 12218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:19.632283', 'step': 12218, 'epoch': 2} {'type': 'loss', 'content': 0.11929338425397873, 'timestamp': '2025-09-30 22:31:19.635395', 'step': 12219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:19.705891', 'step': 12219, 'epoch': 2} {'type': 'loss', 'content': 0.10031586140394211, 'timestamp': '2025-09-30 22:31:19.713538', 'step': 12220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:19.783636', 'step': 12220, 'epoch': 2} {'type': 'loss', 'content': 0.15259794890880585, 'timestamp': '2025-09-30 22:31:19.788476', 'step': 12221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:19.851553', 'step': 12221, 'epoch': 2} {'type': 'loss', 'content': 0.047242291271686554, 'timestamp': '2025-09-30 22:31:19.856074', 'step': 12222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:19.914767', 'step': 12222, 'epoch': 2} {'type': 'loss', 'content': 0.03950237110257149, 'timestamp': '2025-09-30 22:31:19.918107', 'step': 12223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:19.986912', 'step': 12223, 'epoch': 2} {'type': 'loss', 'content': 0.10932823270559311, 'timestamp': '2025-09-30 22:31:19.994014', 'step': 12224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:20.052190', 'step': 12224, 'epoch': 2} {'type': 'loss', 'content': 0.10176535695791245, 'timestamp': '2025-09-30 22:31:20.055504', 'step': 12225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:20.113784', 'step': 12225, 'epoch': 2} {'type': 'loss', 'content': 0.2647847533226013, 'timestamp': '2025-09-30 22:31:20.117920', 'step': 12226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:20.176061', 'step': 12226, 'epoch': 2} {'type': 'loss', 'content': 0.15896762907505035, 'timestamp': '2025-09-30 22:31:20.181382', 'step': 12227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:20.239717', 'step': 12227, 'epoch': 2} {'type': 'loss', 'content': 0.16718420386314392, 'timestamp': '2025-09-30 22:31:20.247289', 'step': 12228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:20.305305', 'step': 12228, 'epoch': 2} {'type': 'loss', 'content': 0.10309663414955139, 'timestamp': '2025-09-30 22:31:20.308388', 'step': 12229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:20.366776', 'step': 12229, 'epoch': 2} {'type': 'loss', 'content': 0.1640530824661255, 'timestamp': '2025-09-30 22:31:20.370576', 'step': 12230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:20.428606', 'step': 12230, 'epoch': 2} {'type': 'loss', 'content': 0.08692704141139984, 'timestamp': '2025-09-30 22:31:20.433166', 'step': 12231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:20.493565', 'step': 12231, 'epoch': 2} {'type': 'loss', 'content': 0.06535293906927109, 'timestamp': '2025-09-30 22:31:20.501542', 'step': 12232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:20.568197', 'step': 12232, 'epoch': 2} {'type': 'loss', 'content': 0.16098366677761078, 'timestamp': '2025-09-30 22:31:20.577566', 'step': 12233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:20.644394', 'step': 12233, 'epoch': 2} {'type': 'loss', 'content': 0.17048777639865875, 'timestamp': '2025-09-30 22:31:20.647866', 'step': 12234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:20.705251', 'step': 12234, 'epoch': 2} {'type': 'loss', 'content': 0.1663587987422943, 'timestamp': '2025-09-30 22:31:20.709197', 'step': 12235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:20.776752', 'step': 12235, 'epoch': 2} {'type': 'loss', 'content': 0.0676143616437912, 'timestamp': '2025-09-30 22:31:20.783856', 'step': 12236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:20.847720', 'step': 12236, 'epoch': 2} {'type': 'loss', 'content': 0.09709417819976807, 'timestamp': '2025-09-30 22:31:20.851255', 'step': 12237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:20.911211', 'step': 12237, 'epoch': 2} {'type': 'loss', 'content': 0.10407371073961258, 'timestamp': '2025-09-30 22:31:20.924949', 'step': 12238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:20.989312', 'step': 12238, 'epoch': 2} {'type': 'loss', 'content': 0.12165585160255432, 'timestamp': '2025-09-30 22:31:20.992665', 'step': 12239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:21.051521', 'step': 12239, 'epoch': 2} {'type': 'loss', 'content': 0.1263779103755951, 'timestamp': '2025-09-30 22:31:21.060830', 'step': 12240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:21.121150', 'step': 12240, 'epoch': 2} {'type': 'loss', 'content': 0.2295820415019989, 'timestamp': '2025-09-30 22:31:21.124426', 'step': 12241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:21.195118', 'step': 12241, 'epoch': 2} {'type': 'loss', 'content': 0.19167779386043549, 'timestamp': '2025-09-30 22:31:21.201378', 'step': 12242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:21.262142', 'step': 12242, 'epoch': 2} {'type': 'loss', 'content': 0.09481147676706314, 'timestamp': '2025-09-30 22:31:21.278361', 'step': 12243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:21.364959', 'step': 12243, 'epoch': 2} {'type': 'loss', 'content': 0.19024252891540527, 'timestamp': '2025-09-30 22:31:21.371757', 'step': 12244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:21.439705', 'step': 12244, 'epoch': 2} {'type': 'loss', 'content': 0.10014046728610992, 'timestamp': '2025-09-30 22:31:21.454826', 'step': 12245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:21.524604', 'step': 12245, 'epoch': 2} {'type': 'loss', 'content': 0.1355801224708557, 'timestamp': '2025-09-30 22:31:21.528329', 'step': 12246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:21.587048', 'step': 12246, 'epoch': 2} {'type': 'loss', 'content': 0.07421919703483582, 'timestamp': '2025-09-30 22:31:21.600283', 'step': 12247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:21.676101', 'step': 12247, 'epoch': 2} {'type': 'loss', 'content': 0.07244925200939178, 'timestamp': '2025-09-30 22:31:21.684329', 'step': 12248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:21.763643', 'step': 12248, 'epoch': 2} {'type': 'loss', 'content': 0.1415751576423645, 'timestamp': '2025-09-30 22:31:21.779055', 'step': 12249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:21.870234', 'step': 12249, 'epoch': 2} {'type': 'loss', 'content': 0.119852714240551, 'timestamp': '2025-09-30 22:31:21.874059', 'step': 12250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:21.953724', 'step': 12250, 'epoch': 2} {'type': 'loss', 'content': 0.07108136266469955, 'timestamp': '2025-09-30 22:31:21.967378', 'step': 12251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:22.033390', 'step': 12251, 'epoch': 2} {'type': 'loss', 'content': 0.24007819592952728, 'timestamp': '2025-09-30 22:31:22.051388', 'step': 12252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:22.111771', 'step': 12252, 'epoch': 2} {'type': 'loss', 'content': 0.2297086864709854, 'timestamp': '2025-09-30 22:31:22.115390', 'step': 12253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:22.185336', 'step': 12253, 'epoch': 2} {'type': 'loss', 'content': 0.10944747179746628, 'timestamp': '2025-09-30 22:31:22.189149', 'step': 12254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:22.248931', 'step': 12254, 'epoch': 2} {'type': 'loss', 'content': 0.11645470559597015, 'timestamp': '2025-09-30 22:31:22.264144', 'step': 12255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:22.333829', 'step': 12255, 'epoch': 2} {'type': 'loss', 'content': 0.08166786283254623, 'timestamp': '2025-09-30 22:31:22.342166', 'step': 12256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:22.414316', 'step': 12256, 'epoch': 2} {'type': 'loss', 'content': 0.13091816008090973, 'timestamp': '2025-09-30 22:31:22.417743', 'step': 12257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:22.488201', 'step': 12257, 'epoch': 2} {'type': 'loss', 'content': 0.15491284430027008, 'timestamp': '2025-09-30 22:31:22.492115', 'step': 12258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:22.553087', 'step': 12258, 'epoch': 2} {'type': 'loss', 'content': 0.1050095409154892, 'timestamp': '2025-09-30 22:31:22.556796', 'step': 12259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:22.617320', 'step': 12259, 'epoch': 2} {'type': 'loss', 'content': 0.09343890845775604, 'timestamp': '2025-09-30 22:31:22.639776', 'step': 12260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:22.699090', 'step': 12260, 'epoch': 2} {'type': 'loss', 'content': 0.0935521125793457, 'timestamp': '2025-09-30 22:31:22.711944', 'step': 12261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:22.772216', 'step': 12261, 'epoch': 2} {'type': 'loss', 'content': 0.14857567846775055, 'timestamp': '2025-09-30 22:31:22.778821', 'step': 12262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:22.842900', 'step': 12262, 'epoch': 2} {'type': 'loss', 'content': 0.16854643821716309, 'timestamp': '2025-09-30 22:31:22.847131', 'step': 12263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:22.911822', 'step': 12263, 'epoch': 2} {'type': 'loss', 'content': 0.16027063131332397, 'timestamp': '2025-09-30 22:31:22.920154', 'step': 12264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:22.980435', 'step': 12264, 'epoch': 2} {'type': 'loss', 'content': 0.2406834214925766, 'timestamp': '2025-09-30 22:31:22.995964', 'step': 12265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:23.055279', 'step': 12265, 'epoch': 2} {'type': 'loss', 'content': 0.12580479681491852, 'timestamp': '2025-09-30 22:31:23.059772', 'step': 12266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:23.117344', 'step': 12266, 'epoch': 2} {'type': 'loss', 'content': 0.06067308411002159, 'timestamp': '2025-09-30 22:31:23.122697', 'step': 12267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:23.180974', 'step': 12267, 'epoch': 2} {'type': 'loss', 'content': 0.14752422273159027, 'timestamp': '2025-09-30 22:31:23.202305', 'step': 12268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:23.260236', 'step': 12268, 'epoch': 2} {'type': 'loss', 'content': 0.10353109985589981, 'timestamp': '2025-09-30 22:31:23.264464', 'step': 12269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:23.322053', 'step': 12269, 'epoch': 2} {'type': 'loss', 'content': 0.16568239033222198, 'timestamp': '2025-09-30 22:31:23.326857', 'step': 12270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:23.387607', 'step': 12270, 'epoch': 2} {'type': 'loss', 'content': 0.08598964661359787, 'timestamp': '2025-09-30 22:31:23.392125', 'step': 12271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:23.452266', 'step': 12271, 'epoch': 2} {'type': 'loss', 'content': 0.15260770916938782, 'timestamp': '2025-09-30 22:31:23.459686', 'step': 12272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:23.517928', 'step': 12272, 'epoch': 2} {'type': 'loss', 'content': 0.1861356794834137, 'timestamp': '2025-09-30 22:31:23.523985', 'step': 12273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:23.583669', 'step': 12273, 'epoch': 2} {'type': 'loss', 'content': 0.14704629778862, 'timestamp': '2025-09-30 22:31:23.587190', 'step': 12274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:23.646116', 'step': 12274, 'epoch': 2} {'type': 'loss', 'content': 0.06886033713817596, 'timestamp': '2025-09-30 22:31:23.661304', 'step': 12275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:23.719823', 'step': 12275, 'epoch': 2} {'type': 'loss', 'content': 0.09824104607105255, 'timestamp': '2025-09-30 22:31:23.734724', 'step': 12276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:23.791931', 'step': 12276, 'epoch': 2} {'type': 'loss', 'content': 0.08311264961957932, 'timestamp': '2025-09-30 22:31:23.796381', 'step': 12277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:23.856516', 'step': 12277, 'epoch': 2} {'type': 'loss', 'content': 0.10305505990982056, 'timestamp': '2025-09-30 22:31:23.860984', 'step': 12278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:23.925196', 'step': 12278, 'epoch': 2} {'type': 'loss', 'content': 0.06546223908662796, 'timestamp': '2025-09-30 22:31:23.929163', 'step': 12279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:23.986775', 'step': 12279, 'epoch': 2} {'type': 'loss', 'content': 0.09462346881628036, 'timestamp': '2025-09-30 22:31:23.995446', 'step': 12280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:24.063760', 'step': 12280, 'epoch': 2} {'type': 'loss', 'content': 0.08652393519878387, 'timestamp': '2025-09-30 22:31:24.068793', 'step': 12281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:24.126930', 'step': 12281, 'epoch': 2} {'type': 'loss', 'content': 0.17674876749515533, 'timestamp': '2025-09-30 22:31:24.132212', 'step': 12282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:24.203525', 'step': 12282, 'epoch': 2} {'type': 'loss', 'content': 0.03303208202123642, 'timestamp': '2025-09-30 22:31:24.213385', 'step': 12283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:24.273259', 'step': 12283, 'epoch': 2} {'type': 'loss', 'content': 0.04715466499328613, 'timestamp': '2025-09-30 22:31:24.292091', 'step': 12284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:24.352098', 'step': 12284, 'epoch': 2} {'type': 'loss', 'content': 0.056790128350257874, 'timestamp': '2025-09-30 22:31:24.356328', 'step': 12285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:24.427579', 'step': 12285, 'epoch': 2} {'type': 'loss', 'content': 0.07459628582000732, 'timestamp': '2025-09-30 22:31:24.443113', 'step': 12286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:24.502163', 'step': 12286, 'epoch': 2} {'type': 'loss', 'content': 0.07914645969867706, 'timestamp': '2025-09-30 22:31:24.507138', 'step': 12287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:24.565355', 'step': 12287, 'epoch': 2} {'type': 'loss', 'content': 0.05880299583077431, 'timestamp': '2025-09-30 22:31:24.573798', 'step': 12288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:24.655687', 'step': 12288, 'epoch': 2} {'type': 'loss', 'content': 0.09584940969944, 'timestamp': '2025-09-30 22:31:24.660571', 'step': 12289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:24.732514', 'step': 12289, 'epoch': 2} {'type': 'loss', 'content': 0.10764759033918381, 'timestamp': '2025-09-30 22:31:24.736957', 'step': 12290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:24.804603', 'step': 12290, 'epoch': 2} {'type': 'loss', 'content': 0.10736039280891418, 'timestamp': '2025-09-30 22:31:24.809776', 'step': 12291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:24.870061', 'step': 12291, 'epoch': 2} {'type': 'loss', 'content': 0.15063852071762085, 'timestamp': '2025-09-30 22:31:24.877822', 'step': 12292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:24.941734', 'step': 12292, 'epoch': 2} {'type': 'loss', 'content': 0.0686291977763176, 'timestamp': '2025-09-30 22:31:24.952686', 'step': 12293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:25.022781', 'step': 12293, 'epoch': 2} {'type': 'loss', 'content': 0.10928195714950562, 'timestamp': '2025-09-30 22:31:25.032254', 'step': 12294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:25.108061', 'step': 12294, 'epoch': 2} {'type': 'loss', 'content': 0.0733712837100029, 'timestamp': '2025-09-30 22:31:25.116443', 'step': 12295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:25.252181', 'step': 12295, 'epoch': 2} {'type': 'loss', 'content': 0.11677521467208862, 'timestamp': '2025-09-30 22:31:25.259477', 'step': 12296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:25.343175', 'step': 12296, 'epoch': 2} {'type': 'loss', 'content': 0.04124994948506355, 'timestamp': '2025-09-30 22:31:25.361202', 'step': 12297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:25.486550', 'step': 12297, 'epoch': 2} {'type': 'loss', 'content': 0.11843656748533249, 'timestamp': '2025-09-30 22:31:25.493311', 'step': 12298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:25.568208', 'step': 12298, 'epoch': 2} {'type': 'loss', 'content': 0.20673829317092896, 'timestamp': '2025-09-30 22:31:25.571647', 'step': 12299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:25.651114', 'step': 12299, 'epoch': 2} {'type': 'loss', 'content': 0.10215785354375839, 'timestamp': '2025-09-30 22:31:25.657905', 'step': 12300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:25.772153', 'step': 12300, 'epoch': 2} {'type': 'loss', 'content': 0.13182729482650757, 'timestamp': '2025-09-30 22:31:25.783131', 'step': 12301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:25.877287', 'step': 12301, 'epoch': 2} {'type': 'loss', 'content': 0.10937481373548508, 'timestamp': '2025-09-30 22:31:25.881220', 'step': 12302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:25.950065', 'step': 12302, 'epoch': 2} {'type': 'loss', 'content': 0.18112924695014954, 'timestamp': '2025-09-30 22:31:25.953313', 'step': 12303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:26.023930', 'step': 12303, 'epoch': 2} {'type': 'loss', 'content': 0.09117180854082108, 'timestamp': '2025-09-30 22:31:26.031066', 'step': 12304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:26.105746', 'step': 12304, 'epoch': 2} {'type': 'loss', 'content': 0.10495705902576447, 'timestamp': '2025-09-30 22:31:26.109379', 'step': 12305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:26.182502', 'step': 12305, 'epoch': 2} {'type': 'loss', 'content': 0.06335596740245819, 'timestamp': '2025-09-30 22:31:26.185836', 'step': 12306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:26.274755', 'step': 12306, 'epoch': 2} {'type': 'loss', 'content': 0.15899254381656647, 'timestamp': '2025-09-30 22:31:26.277340', 'step': 12307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:26.363792', 'step': 12307, 'epoch': 2} {'type': 'loss', 'content': 0.1250167191028595, 'timestamp': '2025-09-30 22:31:26.369958', 'step': 12308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:26.426817', 'step': 12308, 'epoch': 2} {'type': 'loss', 'content': 0.2124253511428833, 'timestamp': '2025-09-30 22:31:26.439102', 'step': 12309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:26.497907', 'step': 12309, 'epoch': 2} {'type': 'loss', 'content': 0.10297209769487381, 'timestamp': '2025-09-30 22:31:26.500739', 'step': 12310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:26.557331', 'step': 12310, 'epoch': 2} {'type': 'loss', 'content': 0.1251038759946823, 'timestamp': '2025-09-30 22:31:26.559895', 'step': 12311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:26.618999', 'step': 12311, 'epoch': 2} {'type': 'loss', 'content': 0.13396555185317993, 'timestamp': '2025-09-30 22:31:26.636420', 'step': 12312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:26.694029', 'step': 12312, 'epoch': 2} {'type': 'loss', 'content': 0.09197467565536499, 'timestamp': '2025-09-30 22:31:26.697344', 'step': 12313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:26.754420', 'step': 12313, 'epoch': 2} {'type': 'loss', 'content': 0.11181136965751648, 'timestamp': '2025-09-30 22:31:26.761522', 'step': 12314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:26.819308', 'step': 12314, 'epoch': 2} {'type': 'loss', 'content': 0.1379704624414444, 'timestamp': '2025-09-30 22:31:26.824268', 'step': 12315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:26.882102', 'step': 12315, 'epoch': 2} {'type': 'loss', 'content': 0.1353243887424469, 'timestamp': '2025-09-30 22:31:26.891361', 'step': 12316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:26.950285', 'step': 12316, 'epoch': 2} {'type': 'loss', 'content': 0.04390852153301239, 'timestamp': '2025-09-30 22:31:26.954374', 'step': 12317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:27.014614', 'step': 12317, 'epoch': 2} {'type': 'loss', 'content': 0.24455620348453522, 'timestamp': '2025-09-30 22:31:27.028405', 'step': 12318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:27.096773', 'step': 12318, 'epoch': 2} {'type': 'loss', 'content': 0.1488090306520462, 'timestamp': '2025-09-30 22:31:27.100542', 'step': 12319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:27.158158', 'step': 12319, 'epoch': 2} {'type': 'loss', 'content': 0.06871886551380157, 'timestamp': '2025-09-30 22:31:27.164778', 'step': 12320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:27.223459', 'step': 12320, 'epoch': 2} {'type': 'loss', 'content': 0.17542551457881927, 'timestamp': '2025-09-30 22:31:27.226769', 'step': 12321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:27.285594', 'step': 12321, 'epoch': 2} {'type': 'loss', 'content': 0.12935148179531097, 'timestamp': '2025-09-30 22:31:27.288754', 'step': 12322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:27.354399', 'step': 12322, 'epoch': 2} {'type': 'loss', 'content': 0.2060391753911972, 'timestamp': '2025-09-30 22:31:27.357776', 'step': 12323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:27.425182', 'step': 12323, 'epoch': 2} {'type': 'loss', 'content': 0.04064569249749184, 'timestamp': '2025-09-30 22:31:27.433350', 'step': 12324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:27.490404', 'step': 12324, 'epoch': 2} {'type': 'loss', 'content': 0.09441693872213364, 'timestamp': '2025-09-30 22:31:27.493436', 'step': 12325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:31:27.551007', 'step': 12325, 'epoch': 2} {'type': 'loss', 'content': 0.1635163426399231, 'timestamp': '2025-09-30 22:31:27.554438', 'step': 12326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:27.611940', 'step': 12326, 'epoch': 2} {'type': 'loss', 'content': 0.1294611096382141, 'timestamp': '2025-09-30 22:31:27.615635', 'step': 12327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:27.682092', 'step': 12327, 'epoch': 2} {'type': 'loss', 'content': 0.16587218642234802, 'timestamp': '2025-09-30 22:31:27.690749', 'step': 12328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:27.748836', 'step': 12328, 'epoch': 2} {'type': 'loss', 'content': 0.25113263726234436, 'timestamp': '2025-09-30 22:31:27.761531', 'step': 12329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:27.818884', 'step': 12329, 'epoch': 2} {'type': 'loss', 'content': 0.14270541071891785, 'timestamp': '2025-09-30 22:31:27.822460', 'step': 12330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:27.881412', 'step': 12330, 'epoch': 2} {'type': 'loss', 'content': 0.10528050363063812, 'timestamp': '2025-09-30 22:31:27.885123', 'step': 12331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:27.941741', 'step': 12331, 'epoch': 2} {'type': 'loss', 'content': 0.08234995603561401, 'timestamp': '2025-09-30 22:31:27.948914', 'step': 12332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:28.005600', 'step': 12332, 'epoch': 2} {'type': 'loss', 'content': 0.09607876092195511, 'timestamp': '2025-09-30 22:31:28.008658', 'step': 12333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:28.077153', 'step': 12333, 'epoch': 2} {'type': 'loss', 'content': 0.12002821266651154, 'timestamp': '2025-09-30 22:31:28.080292', 'step': 12334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:28.142797', 'step': 12334, 'epoch': 2} {'type': 'loss', 'content': 0.13581866025924683, 'timestamp': '2025-09-30 22:31:28.146313', 'step': 12335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:28.206751', 'step': 12335, 'epoch': 2} {'type': 'loss', 'content': 0.1033974140882492, 'timestamp': '2025-09-30 22:31:28.213795', 'step': 12336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:28.270038', 'step': 12336, 'epoch': 2} {'type': 'loss', 'content': 0.1398998349905014, 'timestamp': '2025-09-30 22:31:28.282146', 'step': 12337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:28.338736', 'step': 12337, 'epoch': 2} {'type': 'loss', 'content': 0.07872649282217026, 'timestamp': '2025-09-30 22:31:28.342535', 'step': 12338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:28.400100', 'step': 12338, 'epoch': 2} {'type': 'loss', 'content': 0.24581918120384216, 'timestamp': '2025-09-30 22:31:28.402961', 'step': 12339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:28.468043', 'step': 12339, 'epoch': 2} {'type': 'loss', 'content': 0.13491877913475037, 'timestamp': '2025-09-30 22:31:28.474300', 'step': 12340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:28.531046', 'step': 12340, 'epoch': 2} {'type': 'loss', 'content': 0.1657395213842392, 'timestamp': '2025-09-30 22:31:28.534038', 'step': 12341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:28.594784', 'step': 12341, 'epoch': 2} {'type': 'loss', 'content': 0.12177030742168427, 'timestamp': '2025-09-30 22:31:28.598198', 'step': 12342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:28.654527', 'step': 12342, 'epoch': 2} {'type': 'loss', 'content': 0.07395016402006149, 'timestamp': '2025-09-30 22:31:28.657779', 'step': 12343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:28.715360', 'step': 12343, 'epoch': 2} {'type': 'loss', 'content': 0.14329753816127777, 'timestamp': '2025-09-30 22:31:28.721881', 'step': 12344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:28.778239', 'step': 12344, 'epoch': 2} {'type': 'loss', 'content': 0.0692100077867508, 'timestamp': '2025-09-30 22:31:28.782261', 'step': 12345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:28.840563', 'step': 12345, 'epoch': 2} {'type': 'loss', 'content': 0.1469300389289856, 'timestamp': '2025-09-30 22:31:28.844863', 'step': 12346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:28.903360', 'step': 12346, 'epoch': 2} {'type': 'loss', 'content': 0.12356343120336533, 'timestamp': '2025-09-30 22:31:28.906534', 'step': 12347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:28.963228', 'step': 12347, 'epoch': 2} {'type': 'loss', 'content': 0.09642817080020905, 'timestamp': '2025-09-30 22:31:28.970266', 'step': 12348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:29.050648', 'step': 12348, 'epoch': 2} {'type': 'loss', 'content': 0.1611030399799347, 'timestamp': '2025-09-30 22:31:29.054765', 'step': 12349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:29.118473', 'step': 12349, 'epoch': 2} {'type': 'loss', 'content': 0.1526150405406952, 'timestamp': '2025-09-30 22:31:29.121673', 'step': 12350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:29.180894', 'step': 12350, 'epoch': 2} {'type': 'loss', 'content': 0.1606922149658203, 'timestamp': '2025-09-30 22:31:29.183610', 'step': 12351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:29.242611', 'step': 12351, 'epoch': 2} {'type': 'loss', 'content': 0.0915745422244072, 'timestamp': '2025-09-30 22:31:29.249272', 'step': 12352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:29.307876', 'step': 12352, 'epoch': 2} {'type': 'loss', 'content': 0.19007498025894165, 'timestamp': '2025-09-30 22:31:29.312378', 'step': 12353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:29.368904', 'step': 12353, 'epoch': 2} {'type': 'loss', 'content': 0.143289253115654, 'timestamp': '2025-09-30 22:31:29.371771', 'step': 12354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:29.428455', 'step': 12354, 'epoch': 2} {'type': 'loss', 'content': 0.19630298018455505, 'timestamp': '2025-09-30 22:31:29.431784', 'step': 12355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:29.489172', 'step': 12355, 'epoch': 2} {'type': 'loss', 'content': 0.06560128182172775, 'timestamp': '2025-09-30 22:31:29.496277', 'step': 12356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:29.551907', 'step': 12356, 'epoch': 2} {'type': 'loss', 'content': 0.11283152550458908, 'timestamp': '2025-09-30 22:31:29.555335', 'step': 12357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:29.612740', 'step': 12357, 'epoch': 2} {'type': 'loss', 'content': 0.06723719090223312, 'timestamp': '2025-09-30 22:31:29.616061', 'step': 12358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:29.681321', 'step': 12358, 'epoch': 2} {'type': 'loss', 'content': 0.15682494640350342, 'timestamp': '2025-09-30 22:31:29.684721', 'step': 12359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:29.748734', 'step': 12359, 'epoch': 2} {'type': 'loss', 'content': 0.15609736740589142, 'timestamp': '2025-09-30 22:31:29.755674', 'step': 12360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:29.812884', 'step': 12360, 'epoch': 2} {'type': 'loss', 'content': 0.07667364180088043, 'timestamp': '2025-09-30 22:31:29.817951', 'step': 12361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:29.874984', 'step': 12361, 'epoch': 2} {'type': 'loss', 'content': 0.16621635854244232, 'timestamp': '2025-09-30 22:31:29.877881', 'step': 12362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:29.936972', 'step': 12362, 'epoch': 2} {'type': 'loss', 'content': 0.10552136600017548, 'timestamp': '2025-09-30 22:31:29.940938', 'step': 12363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.013657', 'step': 12363, 'epoch': 2} {'type': 'loss', 'content': 0.055908605456352234, 'timestamp': '2025-09-30 22:31:30.026874', 'step': 12364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.097648', 'step': 12364, 'epoch': 2} {'type': 'loss', 'content': 0.10938741266727448, 'timestamp': '2025-09-30 22:31:30.100689', 'step': 12365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:30.165018', 'step': 12365, 'epoch': 2} {'type': 'loss', 'content': 0.11274203658103943, 'timestamp': '2025-09-30 22:31:30.168866', 'step': 12366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.226338', 'step': 12366, 'epoch': 2} {'type': 'loss', 'content': 0.1062048077583313, 'timestamp': '2025-09-30 22:31:30.235722', 'step': 12367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:30.293646', 'step': 12367, 'epoch': 2} {'type': 'loss', 'content': 0.11335542052984238, 'timestamp': '2025-09-30 22:31:30.300552', 'step': 12368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:30.362098', 'step': 12368, 'epoch': 2} {'type': 'loss', 'content': 0.11938420683145523, 'timestamp': '2025-09-30 22:31:30.367184', 'step': 12369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:30.427477', 'step': 12369, 'epoch': 2} {'type': 'loss', 'content': 0.09081438183784485, 'timestamp': '2025-09-30 22:31:30.435527', 'step': 12370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.500205', 'step': 12370, 'epoch': 2} {'type': 'loss', 'content': 0.17855031788349152, 'timestamp': '2025-09-30 22:31:30.503743', 'step': 12371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.560739', 'step': 12371, 'epoch': 2} {'type': 'loss', 'content': 0.16085626184940338, 'timestamp': '2025-09-30 22:31:30.571099', 'step': 12372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:30.634419', 'step': 12372, 'epoch': 2} {'type': 'loss', 'content': 0.11728447675704956, 'timestamp': '2025-09-30 22:31:30.637270', 'step': 12373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.694993', 'step': 12373, 'epoch': 2} {'type': 'loss', 'content': 0.16991202533245087, 'timestamp': '2025-09-30 22:31:30.699625', 'step': 12374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:30.767825', 'step': 12374, 'epoch': 2} {'type': 'loss', 'content': 0.17686539888381958, 'timestamp': '2025-09-30 22:31:30.777248', 'step': 12375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.838158', 'step': 12375, 'epoch': 2} {'type': 'loss', 'content': 0.09904327243566513, 'timestamp': '2025-09-30 22:31:30.845449', 'step': 12376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.911768', 'step': 12376, 'epoch': 2} {'type': 'loss', 'content': 0.13435021042823792, 'timestamp': '2025-09-30 22:31:30.915068', 'step': 12377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:30.971391', 'step': 12377, 'epoch': 2} {'type': 'loss', 'content': 0.0948188379406929, 'timestamp': '2025-09-30 22:31:30.975939', 'step': 12378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:31.035794', 'step': 12378, 'epoch': 2} {'type': 'loss', 'content': 0.078889861702919, 'timestamp': '2025-09-30 22:31:31.039421', 'step': 12379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:31.097188', 'step': 12379, 'epoch': 2} {'type': 'loss', 'content': 0.11362023651599884, 'timestamp': '2025-09-30 22:31:31.104647', 'step': 12380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:31.160665', 'step': 12380, 'epoch': 2} {'type': 'loss', 'content': 0.10034950077533722, 'timestamp': '2025-09-30 22:31:31.163869', 'step': 12381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:31.231762', 'step': 12381, 'epoch': 2} {'type': 'loss', 'content': 0.06980430334806442, 'timestamp': '2025-09-30 22:31:31.237879', 'step': 12382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:31.317115', 'step': 12382, 'epoch': 2} {'type': 'loss', 'content': 0.17074249684810638, 'timestamp': '2025-09-30 22:31:31.322073', 'step': 12383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:31.383299', 'step': 12383, 'epoch': 2} {'type': 'loss', 'content': 0.0893552377820015, 'timestamp': '2025-09-30 22:31:31.391634', 'step': 12384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:31.463985', 'step': 12384, 'epoch': 2} {'type': 'loss', 'content': 0.1313256174325943, 'timestamp': '2025-09-30 22:31:31.467768', 'step': 12385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:31.534031', 'step': 12385, 'epoch': 2} {'type': 'loss', 'content': 0.09883846342563629, 'timestamp': '2025-09-30 22:31:31.539041', 'step': 12386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:31.597241', 'step': 12386, 'epoch': 2} {'type': 'loss', 'content': 0.2514182925224304, 'timestamp': '2025-09-30 22:31:31.600494', 'step': 12387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:31.657343', 'step': 12387, 'epoch': 2} {'type': 'loss', 'content': 0.11515367776155472, 'timestamp': '2025-09-30 22:31:31.669874', 'step': 12388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:31.734698', 'step': 12388, 'epoch': 2} {'type': 'loss', 'content': 0.14282111823558807, 'timestamp': '2025-09-30 22:31:31.739353', 'step': 12389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:31.797844', 'step': 12389, 'epoch': 2} {'type': 'loss', 'content': 0.10380076617002487, 'timestamp': '2025-09-30 22:31:31.800845', 'step': 12390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:31.861949', 'step': 12390, 'epoch': 2} {'type': 'loss', 'content': 0.12307582050561905, 'timestamp': '2025-09-30 22:31:31.869513', 'step': 12391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:31.928984', 'step': 12391, 'epoch': 2} {'type': 'loss', 'content': 0.12515859305858612, 'timestamp': '2025-09-30 22:31:31.942161', 'step': 12392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:31.999165', 'step': 12392, 'epoch': 2} {'type': 'loss', 'content': 0.05215764418244362, 'timestamp': '2025-09-30 22:31:32.010390', 'step': 12393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:32.067348', 'step': 12393, 'epoch': 2} {'type': 'loss', 'content': 0.15709707140922546, 'timestamp': '2025-09-30 22:31:32.072683', 'step': 12394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:32.144495', 'step': 12394, 'epoch': 2} {'type': 'loss', 'content': 0.11033536493778229, 'timestamp': '2025-09-30 22:31:32.161745', 'step': 12395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:32.220564', 'step': 12395, 'epoch': 2} {'type': 'loss', 'content': 0.15367074310779572, 'timestamp': '2025-09-30 22:31:32.230821', 'step': 12396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:32.288993', 'step': 12396, 'epoch': 2} {'type': 'loss', 'content': 0.11986712366342545, 'timestamp': '2025-09-30 22:31:32.308636', 'step': 12397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:32.367210', 'step': 12397, 'epoch': 2} {'type': 'loss', 'content': 0.11600611358880997, 'timestamp': '2025-09-30 22:31:32.372956', 'step': 12398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:32.431870', 'step': 12398, 'epoch': 2} {'type': 'loss', 'content': 0.12319769710302353, 'timestamp': '2025-09-30 22:31:32.450897', 'step': 12399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:32.511017', 'step': 12399, 'epoch': 2} {'type': 'loss', 'content': 0.06736578792333603, 'timestamp': '2025-09-30 22:31:32.527163', 'step': 12400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:32.589378', 'step': 12400, 'epoch': 2} {'type': 'loss', 'content': 0.10718642920255661, 'timestamp': '2025-09-30 22:31:32.604259', 'step': 12401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:32.679633', 'step': 12401, 'epoch': 2} {'type': 'loss', 'content': 0.1970217525959015, 'timestamp': '2025-09-30 22:31:32.683756', 'step': 12402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:32.749723', 'step': 12402, 'epoch': 2} {'type': 'loss', 'content': 0.08275385946035385, 'timestamp': '2025-09-30 22:31:32.754642', 'step': 12403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:32.812437', 'step': 12403, 'epoch': 2} {'type': 'loss', 'content': 0.25526487827301025, 'timestamp': '2025-09-30 22:31:32.824755', 'step': 12404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:32.883334', 'step': 12404, 'epoch': 2} {'type': 'loss', 'content': 0.1471944898366928, 'timestamp': '2025-09-30 22:31:32.890198', 'step': 12405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:32.946849', 'step': 12405, 'epoch': 2} {'type': 'loss', 'content': 0.292786180973053, 'timestamp': '2025-09-30 22:31:32.949547', 'step': 12406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:33.009131', 'step': 12406, 'epoch': 2} {'type': 'loss', 'content': 0.15853331983089447, 'timestamp': '2025-09-30 22:31:33.011418', 'step': 12407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:33.072389', 'step': 12407, 'epoch': 2} {'type': 'loss', 'content': 0.12019924074411392, 'timestamp': '2025-09-30 22:31:33.082958', 'step': 12408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:33.144247', 'step': 12408, 'epoch': 2} {'type': 'loss', 'content': 0.12529490888118744, 'timestamp': '2025-09-30 22:31:33.147610', 'step': 12409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:33.204405', 'step': 12409, 'epoch': 2} {'type': 'loss', 'content': 0.08810193091630936, 'timestamp': '2025-09-30 22:31:33.207017', 'step': 12410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:33.263581', 'step': 12410, 'epoch': 2} {'type': 'loss', 'content': 0.08200933039188385, 'timestamp': '2025-09-30 22:31:33.266403', 'step': 12411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:33.323239', 'step': 12411, 'epoch': 2} {'type': 'loss', 'content': 0.10691838711500168, 'timestamp': '2025-09-30 22:31:33.330343', 'step': 12412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:33.385423', 'step': 12412, 'epoch': 2} {'type': 'loss', 'content': 0.1093377023935318, 'timestamp': '2025-09-30 22:31:33.387707', 'step': 12413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:33.445522', 'step': 12413, 'epoch': 2} {'type': 'loss', 'content': 0.11585784703493118, 'timestamp': '2025-09-30 22:31:33.448209', 'step': 12414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:33.505336', 'step': 12414, 'epoch': 2} {'type': 'loss', 'content': 0.13520175218582153, 'timestamp': '2025-09-30 22:31:33.507477', 'step': 12415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:33.563620', 'step': 12415, 'epoch': 2} {'type': 'loss', 'content': 0.14411409199237823, 'timestamp': '2025-09-30 22:31:33.569562', 'step': 12416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:33.625546', 'step': 12416, 'epoch': 2} {'type': 'loss', 'content': 0.10362598299980164, 'timestamp': '2025-09-30 22:31:33.629422', 'step': 12417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:33.687833', 'step': 12417, 'epoch': 2} {'type': 'loss', 'content': 0.10282497107982635, 'timestamp': '2025-09-30 22:31:33.692571', 'step': 12418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:33.750999', 'step': 12418, 'epoch': 2} {'type': 'loss', 'content': 0.15929271280765533, 'timestamp': '2025-09-30 22:31:33.757569', 'step': 12419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:33.817748', 'step': 12419, 'epoch': 2} {'type': 'loss', 'content': 0.1174786165356636, 'timestamp': '2025-09-30 22:31:33.824421', 'step': 12420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:33.883216', 'step': 12420, 'epoch': 2} {'type': 'loss', 'content': 0.09693904221057892, 'timestamp': '2025-09-30 22:31:33.885995', 'step': 12421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:33.942829', 'step': 12421, 'epoch': 2} {'type': 'loss', 'content': 0.1651396006345749, 'timestamp': '2025-09-30 22:31:33.945690', 'step': 12422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:34.003147', 'step': 12422, 'epoch': 2} {'type': 'loss', 'content': 0.06664565205574036, 'timestamp': '2025-09-30 22:31:34.006645', 'step': 12423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:34.063713', 'step': 12423, 'epoch': 2} {'type': 'loss', 'content': 0.10971266031265259, 'timestamp': '2025-09-30 22:31:34.070720', 'step': 12424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:34.126929', 'step': 12424, 'epoch': 2} {'type': 'loss', 'content': 0.08473894745111465, 'timestamp': '2025-09-30 22:31:34.130648', 'step': 12425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:34.187115', 'step': 12425, 'epoch': 2} {'type': 'loss', 'content': 0.14568229019641876, 'timestamp': '2025-09-30 22:31:34.190729', 'step': 12426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:34.248244', 'step': 12426, 'epoch': 2} {'type': 'loss', 'content': 0.17319682240486145, 'timestamp': '2025-09-30 22:31:34.251575', 'step': 12427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:34.309800', 'step': 12427, 'epoch': 2} {'type': 'loss', 'content': 0.05007154867053032, 'timestamp': '2025-09-30 22:31:34.316016', 'step': 12428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:34.374428', 'step': 12428, 'epoch': 2} {'type': 'loss', 'content': 0.20989249646663666, 'timestamp': '2025-09-30 22:31:34.378302', 'step': 12429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:34.445416', 'step': 12429, 'epoch': 2} {'type': 'loss', 'content': 0.11870840191841125, 'timestamp': '2025-09-30 22:31:34.454706', 'step': 12430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:34.518181', 'step': 12430, 'epoch': 2} {'type': 'loss', 'content': 0.14004655182361603, 'timestamp': '2025-09-30 22:31:34.526330', 'step': 12431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:34.584167', 'step': 12431, 'epoch': 2} {'type': 'loss', 'content': 0.0344407819211483, 'timestamp': '2025-09-30 22:31:34.592278', 'step': 12432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:34.649675', 'step': 12432, 'epoch': 2} {'type': 'loss', 'content': 0.11879542469978333, 'timestamp': '2025-09-30 22:31:34.661057', 'step': 12433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:34.726143', 'step': 12433, 'epoch': 2} {'type': 'loss', 'content': 0.14214737713336945, 'timestamp': '2025-09-30 22:31:34.734420', 'step': 12434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:34.805434', 'step': 12434, 'epoch': 2} {'type': 'loss', 'content': 0.05503411218523979, 'timestamp': '2025-09-30 22:31:34.808908', 'step': 12435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:34.864885', 'step': 12435, 'epoch': 2} {'type': 'loss', 'content': 0.09735594689846039, 'timestamp': '2025-09-30 22:31:34.871068', 'step': 12436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:34.926595', 'step': 12436, 'epoch': 2} {'type': 'loss', 'content': 0.12405941635370255, 'timestamp': '2025-09-30 22:31:34.932648', 'step': 12437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:34.993419', 'step': 12437, 'epoch': 2} {'type': 'loss', 'content': 0.09405507892370224, 'timestamp': '2025-09-30 22:31:34.998657', 'step': 12438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:35.058096', 'step': 12438, 'epoch': 2} {'type': 'loss', 'content': 0.2360500991344452, 'timestamp': '2025-09-30 22:31:35.062139', 'step': 12439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:35.119766', 'step': 12439, 'epoch': 2} {'type': 'loss', 'content': 0.10962460935115814, 'timestamp': '2025-09-30 22:31:35.128235', 'step': 12440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:35.184263', 'step': 12440, 'epoch': 2} {'type': 'loss', 'content': 0.09030324220657349, 'timestamp': '2025-09-30 22:31:35.190594', 'step': 12441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:35.251084', 'step': 12441, 'epoch': 2} {'type': 'loss', 'content': 0.09367551654577255, 'timestamp': '2025-09-30 22:31:35.257324', 'step': 12442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:35.314279', 'step': 12442, 'epoch': 2} {'type': 'loss', 'content': 0.07739300280809402, 'timestamp': '2025-09-30 22:31:35.317089', 'step': 12443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:35.374116', 'step': 12443, 'epoch': 2} {'type': 'loss', 'content': 0.1476202756166458, 'timestamp': '2025-09-30 22:31:35.380322', 'step': 12444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:35.435837', 'step': 12444, 'epoch': 2} {'type': 'loss', 'content': 0.16806700825691223, 'timestamp': '2025-09-30 22:31:35.442948', 'step': 12445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:35.505393', 'step': 12445, 'epoch': 2} {'type': 'loss', 'content': 0.1463763266801834, 'timestamp': '2025-09-30 22:31:35.514785', 'step': 12446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:35.573381', 'step': 12446, 'epoch': 2} {'type': 'loss', 'content': 0.13022752106189728, 'timestamp': '2025-09-30 22:31:35.580458', 'step': 12447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:35.643212', 'step': 12447, 'epoch': 2} {'type': 'loss', 'content': 0.09456636756658554, 'timestamp': '2025-09-30 22:31:35.654862', 'step': 12448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:35.718266', 'step': 12448, 'epoch': 2} {'type': 'loss', 'content': 0.05499294027686119, 'timestamp': '2025-09-30 22:31:35.729848', 'step': 12449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:35.792703', 'step': 12449, 'epoch': 2} {'type': 'loss', 'content': 0.09008611738681793, 'timestamp': '2025-09-30 22:31:35.795591', 'step': 12450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:35.852141', 'step': 12450, 'epoch': 2} {'type': 'loss', 'content': 0.17919230461120605, 'timestamp': '2025-09-30 22:31:35.854949', 'step': 12451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:35.912378', 'step': 12451, 'epoch': 2} {'type': 'loss', 'content': 0.16283391416072845, 'timestamp': '2025-09-30 22:31:35.919317', 'step': 12452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:35.976249', 'step': 12452, 'epoch': 2} {'type': 'loss', 'content': 0.08153238147497177, 'timestamp': '2025-09-30 22:31:35.979222', 'step': 12453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:36.036895', 'step': 12453, 'epoch': 2} {'type': 'loss', 'content': 0.23476998507976532, 'timestamp': '2025-09-30 22:31:36.039457', 'step': 12454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:36.098423', 'step': 12454, 'epoch': 2} {'type': 'loss', 'content': 0.1318262815475464, 'timestamp': '2025-09-30 22:31:36.101088', 'step': 12455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:36.157085', 'step': 12455, 'epoch': 2} {'type': 'loss', 'content': 0.09989558160305023, 'timestamp': '2025-09-30 22:31:36.169530', 'step': 12456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:36.226024', 'step': 12456, 'epoch': 2} {'type': 'loss', 'content': 0.10925276577472687, 'timestamp': '2025-09-30 22:31:36.235849', 'step': 12457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:36.297696', 'step': 12457, 'epoch': 2} {'type': 'loss', 'content': 0.09670325368642807, 'timestamp': '2025-09-30 22:31:36.304182', 'step': 12458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:36.365849', 'step': 12458, 'epoch': 2} {'type': 'loss', 'content': 0.21405939757823944, 'timestamp': '2025-09-30 22:31:36.370162', 'step': 12459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:31:36.429822', 'step': 12459, 'epoch': 2} {'type': 'loss', 'content': 0.09415405243635178, 'timestamp': '2025-09-30 22:31:36.438811', 'step': 12460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:36.504225', 'step': 12460, 'epoch': 2} {'type': 'loss', 'content': 0.10617949813604355, 'timestamp': '2025-09-30 22:31:36.514506', 'step': 12461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:36.579294', 'step': 12461, 'epoch': 2} {'type': 'loss', 'content': 0.09856059402227402, 'timestamp': '2025-09-30 22:31:36.582183', 'step': 12462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:36.639141', 'step': 12462, 'epoch': 2} {'type': 'loss', 'content': 0.12641893327236176, 'timestamp': '2025-09-30 22:31:36.646925', 'step': 12463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:36.711072', 'step': 12463, 'epoch': 2} {'type': 'loss', 'content': 0.09744957834482193, 'timestamp': '2025-09-30 22:31:36.723031', 'step': 12464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:36.780216', 'step': 12464, 'epoch': 2} {'type': 'loss', 'content': 0.10253764688968658, 'timestamp': '2025-09-30 22:31:36.782760', 'step': 12465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:36.839695', 'step': 12465, 'epoch': 2} {'type': 'loss', 'content': 0.12281623482704163, 'timestamp': '2025-09-30 22:31:36.842689', 'step': 12466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:36.900088', 'step': 12466, 'epoch': 2} {'type': 'loss', 'content': 0.08621565252542496, 'timestamp': '2025-09-30 22:31:36.908179', 'step': 12467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:36.967829', 'step': 12467, 'epoch': 2} {'type': 'loss', 'content': 0.10496658086776733, 'timestamp': '2025-09-30 22:31:36.978390', 'step': 12468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:37.035406', 'step': 12468, 'epoch': 2} {'type': 'loss', 'content': 0.16154317557811737, 'timestamp': '2025-09-30 22:31:37.038059', 'step': 12469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:37.104245', 'step': 12469, 'epoch': 2} {'type': 'loss', 'content': 0.14948013424873352, 'timestamp': '2025-09-30 22:31:37.110287', 'step': 12470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:37.168855', 'step': 12470, 'epoch': 2} {'type': 'loss', 'content': 0.13290074467658997, 'timestamp': '2025-09-30 22:31:37.176507', 'step': 12471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:37.233512', 'step': 12471, 'epoch': 2} {'type': 'loss', 'content': 0.12598107755184174, 'timestamp': '2025-09-30 22:31:37.245976', 'step': 12472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:37.310258', 'step': 12472, 'epoch': 2} {'type': 'loss', 'content': 0.12336252629756927, 'timestamp': '2025-09-30 22:31:37.316627', 'step': 12473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:37.379043', 'step': 12473, 'epoch': 2} {'type': 'loss', 'content': 0.11451666057109833, 'timestamp': '2025-09-30 22:31:37.388833', 'step': 12474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:37.450983', 'step': 12474, 'epoch': 2} {'type': 'loss', 'content': 0.06666135787963867, 'timestamp': '2025-09-30 22:31:37.454491', 'step': 12475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:37.512711', 'step': 12475, 'epoch': 2} {'type': 'loss', 'content': 0.09332442283630371, 'timestamp': '2025-09-30 22:31:37.519346', 'step': 12476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:37.576652', 'step': 12476, 'epoch': 2} {'type': 'loss', 'content': 0.15804293751716614, 'timestamp': '2025-09-30 22:31:37.584144', 'step': 12477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:37.649246', 'step': 12477, 'epoch': 2} {'type': 'loss', 'content': 0.14220592379570007, 'timestamp': '2025-09-30 22:31:37.662589', 'step': 12478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:37.727726', 'step': 12478, 'epoch': 2} {'type': 'loss', 'content': 0.10289351642131805, 'timestamp': '2025-09-30 22:31:37.730737', 'step': 12479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:37.797968', 'step': 12479, 'epoch': 2} {'type': 'loss', 'content': 0.11966858804225922, 'timestamp': '2025-09-30 22:31:37.805311', 'step': 12480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:37.878017', 'step': 12480, 'epoch': 2} {'type': 'loss', 'content': 0.053051676601171494, 'timestamp': '2025-09-30 22:31:37.881526', 'step': 12481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:37.938406', 'step': 12481, 'epoch': 2} {'type': 'loss', 'content': 0.12047969549894333, 'timestamp': '2025-09-30 22:31:37.945096', 'step': 12482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:38.003520', 'step': 12482, 'epoch': 2} {'type': 'loss', 'content': 0.07737402617931366, 'timestamp': '2025-09-30 22:31:38.007156', 'step': 12483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:38.064277', 'step': 12483, 'epoch': 2} {'type': 'loss', 'content': 0.15109211206436157, 'timestamp': '2025-09-30 22:31:38.075644', 'step': 12484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:38.146025', 'step': 12484, 'epoch': 2} {'type': 'loss', 'content': 0.09809565544128418, 'timestamp': '2025-09-30 22:31:38.149746', 'step': 12485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:38.208997', 'step': 12485, 'epoch': 2} {'type': 'loss', 'content': 0.1709863245487213, 'timestamp': '2025-09-30 22:31:38.212329', 'step': 12486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:38.274373', 'step': 12486, 'epoch': 2} {'type': 'loss', 'content': 0.186521977186203, 'timestamp': '2025-09-30 22:31:38.281953', 'step': 12487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:38.341163', 'step': 12487, 'epoch': 2} {'type': 'loss', 'content': 0.1585291177034378, 'timestamp': '2025-09-30 22:31:38.348267', 'step': 12488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:38.404661', 'step': 12488, 'epoch': 2} {'type': 'loss', 'content': 0.13241054117679596, 'timestamp': '2025-09-30 22:31:38.414214', 'step': 12489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:38.473574', 'step': 12489, 'epoch': 2} {'type': 'loss', 'content': 0.10388504713773727, 'timestamp': '2025-09-30 22:31:38.477427', 'step': 12490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:38.537210', 'step': 12490, 'epoch': 2} {'type': 'loss', 'content': 0.10405625402927399, 'timestamp': '2025-09-30 22:31:38.544588', 'step': 12491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:38.607948', 'step': 12491, 'epoch': 2} {'type': 'loss', 'content': 0.10821209847927094, 'timestamp': '2025-09-30 22:31:38.616978', 'step': 12492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:38.679178', 'step': 12492, 'epoch': 2} {'type': 'loss', 'content': 0.15035481750965118, 'timestamp': '2025-09-30 22:31:38.683303', 'step': 12493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:38.752659', 'step': 12493, 'epoch': 2} {'type': 'loss', 'content': 0.13030250370502472, 'timestamp': '2025-09-30 22:31:38.761829', 'step': 12494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:38.830892', 'step': 12494, 'epoch': 2} {'type': 'loss', 'content': 0.09846977889537811, 'timestamp': '2025-09-30 22:31:38.841515', 'step': 12495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:38.911731', 'step': 12495, 'epoch': 2} {'type': 'loss', 'content': 0.20834694802761078, 'timestamp': '2025-09-30 22:31:38.924796', 'step': 12496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:38.991931', 'step': 12496, 'epoch': 2} {'type': 'loss', 'content': 0.10253117233514786, 'timestamp': '2025-09-30 22:31:38.995465', 'step': 12497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:39.065516', 'step': 12497, 'epoch': 2} {'type': 'loss', 'content': 0.13000094890594482, 'timestamp': '2025-09-30 22:31:39.068452', 'step': 12498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:39.128138', 'step': 12498, 'epoch': 2} {'type': 'loss', 'content': 0.21838445961475372, 'timestamp': '2025-09-30 22:31:39.132088', 'step': 12499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:39.191660', 'step': 12499, 'epoch': 2} {'type': 'loss', 'content': 0.181820347905159, 'timestamp': '2025-09-30 22:31:39.199150', 'step': 12500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12500', 'timestamp': '2025-09-30 22:31:39.653366', 'step': 12500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:39.714394', 'step': 12500, 'epoch': 2} {'type': 'loss', 'content': 0.1326266974210739, 'timestamp': '2025-09-30 22:31:39.717945', 'step': 12501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:39.776130', 'step': 12501, 'epoch': 2} {'type': 'loss', 'content': 0.1349838525056839, 'timestamp': '2025-09-30 22:31:39.791104', 'step': 12502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:39.860240', 'step': 12502, 'epoch': 2} {'type': 'loss', 'content': 0.14994119107723236, 'timestamp': '2025-09-30 22:31:39.864214', 'step': 12503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:39.923547', 'step': 12503, 'epoch': 2} {'type': 'loss', 'content': 0.18538153171539307, 'timestamp': '2025-09-30 22:31:39.931634', 'step': 12504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:39.993073', 'step': 12504, 'epoch': 2} {'type': 'loss', 'content': 0.13526853919029236, 'timestamp': '2025-09-30 22:31:39.996249', 'step': 12505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:40.056676', 'step': 12505, 'epoch': 2} {'type': 'loss', 'content': 0.13382692635059357, 'timestamp': '2025-09-30 22:31:40.063165', 'step': 12506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:40.124249', 'step': 12506, 'epoch': 2} {'type': 'loss', 'content': 0.16029009222984314, 'timestamp': '2025-09-30 22:31:40.127108', 'step': 12507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:40.184374', 'step': 12507, 'epoch': 2} {'type': 'loss', 'content': 0.09187109768390656, 'timestamp': '2025-09-30 22:31:40.192105', 'step': 12508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:31:40.249535', 'step': 12508, 'epoch': 2} {'type': 'loss', 'content': 0.07778061181306839, 'timestamp': '2025-09-30 22:31:40.252793', 'step': 12509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:40.310845', 'step': 12509, 'epoch': 2} {'type': 'loss', 'content': 0.11738558113574982, 'timestamp': '2025-09-30 22:31:40.325952', 'step': 12510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:40.396678', 'step': 12510, 'epoch': 2} {'type': 'loss', 'content': 0.13784131407737732, 'timestamp': '2025-09-30 22:31:40.400469', 'step': 12511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:40.459564', 'step': 12511, 'epoch': 2} {'type': 'loss', 'content': 0.14697031676769257, 'timestamp': '2025-09-30 22:31:40.468355', 'step': 12512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:40.527322', 'step': 12512, 'epoch': 2} {'type': 'loss', 'content': 0.11891712993383408, 'timestamp': '2025-09-30 22:31:40.533426', 'step': 12513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:40.594386', 'step': 12513, 'epoch': 2} {'type': 'loss', 'content': 0.11782050132751465, 'timestamp': '2025-09-30 22:31:40.612813', 'step': 12514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:40.671168', 'step': 12514, 'epoch': 2} {'type': 'loss', 'content': 0.14656789600849152, 'timestamp': '2025-09-30 22:31:40.676726', 'step': 12515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:40.736783', 'step': 12515, 'epoch': 2} {'type': 'loss', 'content': 0.06993433088064194, 'timestamp': '2025-09-30 22:31:40.747024', 'step': 12516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:40.806081', 'step': 12516, 'epoch': 2} {'type': 'loss', 'content': 0.24356548488140106, 'timestamp': '2025-09-30 22:31:40.811632', 'step': 12517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:40.871745', 'step': 12517, 'epoch': 2} {'type': 'loss', 'content': 0.06500374525785446, 'timestamp': '2025-09-30 22:31:40.877930', 'step': 12518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:40.936293', 'step': 12518, 'epoch': 2} {'type': 'loss', 'content': 0.1053333580493927, 'timestamp': '2025-09-30 22:31:40.942571', 'step': 12519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:41.000976', 'step': 12519, 'epoch': 2} {'type': 'loss', 'content': 0.056274864822626114, 'timestamp': '2025-09-30 22:31:41.016797', 'step': 12520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:41.077909', 'step': 12520, 'epoch': 2} {'type': 'loss', 'content': 0.07880805432796478, 'timestamp': '2025-09-30 22:31:41.082027', 'step': 12521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:41.142300', 'step': 12521, 'epoch': 2} {'type': 'loss', 'content': 0.07482670247554779, 'timestamp': '2025-09-30 22:31:41.153956', 'step': 12522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:41.212150', 'step': 12522, 'epoch': 2} {'type': 'loss', 'content': 0.0809992179274559, 'timestamp': '2025-09-30 22:31:41.216011', 'step': 12523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:41.275046', 'step': 12523, 'epoch': 2} {'type': 'loss', 'content': 0.19338391721248627, 'timestamp': '2025-09-30 22:31:41.282372', 'step': 12524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:41.352652', 'step': 12524, 'epoch': 2} {'type': 'loss', 'content': 0.15313434600830078, 'timestamp': '2025-09-30 22:31:41.356029', 'step': 12525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:41.414851', 'step': 12525, 'epoch': 2} {'type': 'loss', 'content': 0.17449000477790833, 'timestamp': '2025-09-30 22:31:41.418345', 'step': 12526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:41.476088', 'step': 12526, 'epoch': 2} {'type': 'loss', 'content': 0.13089971244335175, 'timestamp': '2025-09-30 22:31:41.480088', 'step': 12527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:41.538465', 'step': 12527, 'epoch': 2} {'type': 'loss', 'content': 0.06840889155864716, 'timestamp': '2025-09-30 22:31:41.545364', 'step': 12528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:41.613670', 'step': 12528, 'epoch': 2} {'type': 'loss', 'content': 0.13198287785053253, 'timestamp': '2025-09-30 22:31:41.616339', 'step': 12529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:41.675045', 'step': 12529, 'epoch': 2} {'type': 'loss', 'content': 0.1818229705095291, 'timestamp': '2025-09-30 22:31:41.678404', 'step': 12530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:41.736861', 'step': 12530, 'epoch': 2} {'type': 'loss', 'content': 0.14263486862182617, 'timestamp': '2025-09-30 22:31:41.741751', 'step': 12531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:41.800937', 'step': 12531, 'epoch': 2} {'type': 'loss', 'content': 0.024254951626062393, 'timestamp': '2025-09-30 22:31:41.820291', 'step': 12532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:41.892988', 'step': 12532, 'epoch': 2} {'type': 'loss', 'content': 0.07680099457502365, 'timestamp': '2025-09-30 22:31:41.898806', 'step': 12533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:41.972987', 'step': 12533, 'epoch': 2} {'type': 'loss', 'content': 0.15252842009067535, 'timestamp': '2025-09-30 22:31:41.978417', 'step': 12534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:42.039627', 'step': 12534, 'epoch': 2} {'type': 'loss', 'content': 0.106077179312706, 'timestamp': '2025-09-30 22:31:42.043570', 'step': 12535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:42.113132', 'step': 12535, 'epoch': 2} {'type': 'loss', 'content': 0.13960516452789307, 'timestamp': '2025-09-30 22:31:42.119587', 'step': 12536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:42.178012', 'step': 12536, 'epoch': 2} {'type': 'loss', 'content': 0.07847191393375397, 'timestamp': '2025-09-30 22:31:42.181476', 'step': 12537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:42.239550', 'step': 12537, 'epoch': 2} {'type': 'loss', 'content': 0.12104873359203339, 'timestamp': '2025-09-30 22:31:42.243599', 'step': 12538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:42.338671', 'step': 12538, 'epoch': 2} {'type': 'loss', 'content': 0.11321402341127396, 'timestamp': '2025-09-30 22:31:42.347824', 'step': 12539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:42.425001', 'step': 12539, 'epoch': 2} {'type': 'loss', 'content': 0.09361210465431213, 'timestamp': '2025-09-30 22:31:42.443637', 'step': 12540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:42.509916', 'step': 12540, 'epoch': 2} {'type': 'loss', 'content': 0.14018514752388, 'timestamp': '2025-09-30 22:31:42.513476', 'step': 12541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:42.584533', 'step': 12541, 'epoch': 2} {'type': 'loss', 'content': 0.23264721035957336, 'timestamp': '2025-09-30 22:31:42.599441', 'step': 12542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:42.673199', 'step': 12542, 'epoch': 2} {'type': 'loss', 'content': 0.08782273530960083, 'timestamp': '2025-09-30 22:31:42.677461', 'step': 12543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:42.762532', 'step': 12543, 'epoch': 2} {'type': 'loss', 'content': 0.07236301898956299, 'timestamp': '2025-09-30 22:31:42.770205', 'step': 12544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:42.828454', 'step': 12544, 'epoch': 2} {'type': 'loss', 'content': 0.09637599438428879, 'timestamp': '2025-09-30 22:31:42.832042', 'step': 12545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:42.897331', 'step': 12545, 'epoch': 2} {'type': 'loss', 'content': 0.08483058959245682, 'timestamp': '2025-09-30 22:31:42.903675', 'step': 12546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:42.963417', 'step': 12546, 'epoch': 2} {'type': 'loss', 'content': 0.2004375159740448, 'timestamp': '2025-09-30 22:31:42.969335', 'step': 12547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:43.047375', 'step': 12547, 'epoch': 2} {'type': 'loss', 'content': 0.11149464547634125, 'timestamp': '2025-09-30 22:31:43.055087', 'step': 12548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:43.152749', 'step': 12548, 'epoch': 2} {'type': 'loss', 'content': 0.08180420845746994, 'timestamp': '2025-09-30 22:31:43.157164', 'step': 12549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:43.216245', 'step': 12549, 'epoch': 2} {'type': 'loss', 'content': 0.20535454154014587, 'timestamp': '2025-09-30 22:31:43.220170', 'step': 12550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:43.289953', 'step': 12550, 'epoch': 2} {'type': 'loss', 'content': 0.19531333446502686, 'timestamp': '2025-09-30 22:31:43.302593', 'step': 12551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:43.378563', 'step': 12551, 'epoch': 2} {'type': 'loss', 'content': 0.20753343403339386, 'timestamp': '2025-09-30 22:31:43.389055', 'step': 12552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:43.527222', 'step': 12552, 'epoch': 2} {'type': 'loss', 'content': 0.06745943427085876, 'timestamp': '2025-09-30 22:31:43.531146', 'step': 12553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:43.608529', 'step': 12553, 'epoch': 2} {'type': 'loss', 'content': 0.1195085272192955, 'timestamp': '2025-09-30 22:31:43.612337', 'step': 12554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:43.673265', 'step': 12554, 'epoch': 2} {'type': 'loss', 'content': 0.18460609018802643, 'timestamp': '2025-09-30 22:31:43.680689', 'step': 12555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:43.741337', 'step': 12555, 'epoch': 2} {'type': 'loss', 'content': 0.18309497833251953, 'timestamp': '2025-09-30 22:31:43.758814', 'step': 12556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:43.830506', 'step': 12556, 'epoch': 2} {'type': 'loss', 'content': 0.1081196665763855, 'timestamp': '2025-09-30 22:31:43.835414', 'step': 12557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:43.894412', 'step': 12557, 'epoch': 2} {'type': 'loss', 'content': 0.09875579178333282, 'timestamp': '2025-09-30 22:31:43.898723', 'step': 12558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:43.956704', 'step': 12558, 'epoch': 2} {'type': 'loss', 'content': 0.16532428562641144, 'timestamp': '2025-09-30 22:31:43.960361', 'step': 12559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:44.027764', 'step': 12559, 'epoch': 2} {'type': 'loss', 'content': 0.1499125212430954, 'timestamp': '2025-09-30 22:31:44.035398', 'step': 12560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:44.093714', 'step': 12560, 'epoch': 2} {'type': 'loss', 'content': 0.07134594768285751, 'timestamp': '2025-09-30 22:31:44.097326', 'step': 12561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:44.154259', 'step': 12561, 'epoch': 2} {'type': 'loss', 'content': 0.19047337770462036, 'timestamp': '2025-09-30 22:31:44.159645', 'step': 12562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:44.218116', 'step': 12562, 'epoch': 2} {'type': 'loss', 'content': 0.22401635348796844, 'timestamp': '2025-09-30 22:31:44.222025', 'step': 12563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:44.290686', 'step': 12563, 'epoch': 2} {'type': 'loss', 'content': 0.16122844815254211, 'timestamp': '2025-09-30 22:31:44.298857', 'step': 12564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:44.357097', 'step': 12564, 'epoch': 2} {'type': 'loss', 'content': 0.14085766673088074, 'timestamp': '2025-09-30 22:31:44.373595', 'step': 12565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:44.432068', 'step': 12565, 'epoch': 2} {'type': 'loss', 'content': 0.09831379354000092, 'timestamp': '2025-09-30 22:31:44.436921', 'step': 12566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:44.494812', 'step': 12566, 'epoch': 2} {'type': 'loss', 'content': 0.13601428270339966, 'timestamp': '2025-09-30 22:31:44.498654', 'step': 12567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:44.558049', 'step': 12567, 'epoch': 2} {'type': 'loss', 'content': 0.12455142289400101, 'timestamp': '2025-09-30 22:31:44.567655', 'step': 12568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:44.624757', 'step': 12568, 'epoch': 2} {'type': 'loss', 'content': 0.20313337445259094, 'timestamp': '2025-09-30 22:31:44.628489', 'step': 12569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:44.696913', 'step': 12569, 'epoch': 2} {'type': 'loss', 'content': 0.20297537744045258, 'timestamp': '2025-09-30 22:31:44.700542', 'step': 12570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:44.760846', 'step': 12570, 'epoch': 2} {'type': 'loss', 'content': 0.1173185482621193, 'timestamp': '2025-09-30 22:31:44.774480', 'step': 12571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:44.858435', 'step': 12571, 'epoch': 2} {'type': 'loss', 'content': 0.09294954687356949, 'timestamp': '2025-09-30 22:31:44.874432', 'step': 12572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:44.943931', 'step': 12572, 'epoch': 2} {'type': 'loss', 'content': 0.12376753985881805, 'timestamp': '2025-09-30 22:31:44.947117', 'step': 12573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:31:45.006604', 'step': 12573, 'epoch': 2} {'type': 'loss', 'content': 0.18826457858085632, 'timestamp': '2025-09-30 22:31:45.019973', 'step': 12574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:45.108292', 'step': 12574, 'epoch': 2} {'type': 'loss', 'content': 0.09502297639846802, 'timestamp': '2025-09-30 22:31:45.118499', 'step': 12575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:45.189909', 'step': 12575, 'epoch': 2} {'type': 'loss', 'content': 0.127254456281662, 'timestamp': '2025-09-30 22:31:45.199342', 'step': 12576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:31:45.267026', 'step': 12576, 'epoch': 2} {'type': 'loss', 'content': 0.10511599481105804, 'timestamp': '2025-09-30 22:31:45.284672', 'step': 12577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:45.346852', 'step': 12577, 'epoch': 2} {'type': 'loss', 'content': 0.17995718121528625, 'timestamp': '2025-09-30 22:31:45.360034', 'step': 12578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:45.428795', 'step': 12578, 'epoch': 2} {'type': 'loss', 'content': 0.1514519304037094, 'timestamp': '2025-09-30 22:31:45.433917', 'step': 12579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:45.520325', 'step': 12579, 'epoch': 2} {'type': 'loss', 'content': 0.19380833208560944, 'timestamp': '2025-09-30 22:31:45.539518', 'step': 12580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:45.609997', 'step': 12580, 'epoch': 2} {'type': 'loss', 'content': 0.12244642525911331, 'timestamp': '2025-09-30 22:31:45.623996', 'step': 12581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:45.692656', 'step': 12581, 'epoch': 2} {'type': 'loss', 'content': 0.1598721742630005, 'timestamp': '2025-09-30 22:31:45.708951', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:31:59.694567', 'step': 12582, 'epoch': 2} {'type': 'pplx', 'content': 13554.87929418308, 'timestamp': '2025-09-30 22:31:59.714272', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:31:59.773474', 'step': 12582, 'epoch': 2} {'type': 'loss', 'content': 0.11039621382951736, 'timestamp': '2025-09-30 22:31:59.789985', 'step': 12583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:31:59.850615', 'step': 12583, 'epoch': 2} {'type': 'loss', 'content': 0.10141162574291229, 'timestamp': '2025-09-30 22:31:59.861369', 'step': 12584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:31:59.921579', 'step': 12584, 'epoch': 2} {'type': 'loss', 'content': 0.09892769157886505, 'timestamp': '2025-09-30 22:31:59.925813', 'step': 12585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:31:59.985909', 'step': 12585, 'epoch': 2} {'type': 'loss', 'content': 0.13289488852024078, 'timestamp': '2025-09-30 22:31:59.990777', 'step': 12586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:00.049301', 'step': 12586, 'epoch': 2} {'type': 'loss', 'content': 0.09378055483102798, 'timestamp': '2025-09-30 22:32:00.052258', 'step': 12587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:00.117927', 'step': 12587, 'epoch': 2} {'type': 'loss', 'content': 0.19017699360847473, 'timestamp': '2025-09-30 22:32:00.125853', 'step': 12588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:00.183385', 'step': 12588, 'epoch': 2} {'type': 'loss', 'content': 0.084470734000206, 'timestamp': '2025-09-30 22:32:00.187091', 'step': 12589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:00.245762', 'step': 12589, 'epoch': 2} {'type': 'loss', 'content': 0.11015581339597702, 'timestamp': '2025-09-30 22:32:00.249760', 'step': 12590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:00.307074', 'step': 12590, 'epoch': 2} {'type': 'loss', 'content': 0.05632622167468071, 'timestamp': '2025-09-30 22:32:00.310683', 'step': 12591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:00.368608', 'step': 12591, 'epoch': 2} {'type': 'loss', 'content': 0.08991119265556335, 'timestamp': '2025-09-30 22:32:00.375951', 'step': 12592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:00.434342', 'step': 12592, 'epoch': 2} {'type': 'loss', 'content': 0.1259666532278061, 'timestamp': '2025-09-30 22:32:00.437802', 'step': 12593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:00.495589', 'step': 12593, 'epoch': 2} {'type': 'loss', 'content': 0.09427230805158615, 'timestamp': '2025-09-30 22:32:00.499893', 'step': 12594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:00.558698', 'step': 12594, 'epoch': 2} {'type': 'loss', 'content': 0.1435316652059555, 'timestamp': '2025-09-30 22:32:00.563267', 'step': 12595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:00.622212', 'step': 12595, 'epoch': 2} {'type': 'loss', 'content': 0.14560699462890625, 'timestamp': '2025-09-30 22:32:00.628991', 'step': 12596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:00.686336', 'step': 12596, 'epoch': 2} {'type': 'loss', 'content': 0.0937158614397049, 'timestamp': '2025-09-30 22:32:00.698943', 'step': 12597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:00.768816', 'step': 12597, 'epoch': 2} {'type': 'loss', 'content': 0.1878790408372879, 'timestamp': '2025-09-30 22:32:00.772826', 'step': 12598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:00.837961', 'step': 12598, 'epoch': 2} {'type': 'loss', 'content': 0.20053350925445557, 'timestamp': '2025-09-30 22:32:00.842282', 'step': 12599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:00.899653', 'step': 12599, 'epoch': 2} {'type': 'loss', 'content': 0.1331576555967331, 'timestamp': '2025-09-30 22:32:00.906591', 'step': 12600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:00.969765', 'step': 12600, 'epoch': 2} {'type': 'loss', 'content': 0.13436885178089142, 'timestamp': '2025-09-30 22:32:00.972890', 'step': 12601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:01.031097', 'step': 12601, 'epoch': 2} {'type': 'loss', 'content': 0.12172261625528336, 'timestamp': '2025-09-30 22:32:01.035070', 'step': 12602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:01.100270', 'step': 12602, 'epoch': 2} {'type': 'loss', 'content': 0.07272541522979736, 'timestamp': '2025-09-30 22:32:01.110269', 'step': 12603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:01.168167', 'step': 12603, 'epoch': 2} {'type': 'loss', 'content': 0.11430744081735611, 'timestamp': '2025-09-30 22:32:01.175318', 'step': 12604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:01.234019', 'step': 12604, 'epoch': 2} {'type': 'loss', 'content': 0.08889706432819366, 'timestamp': '2025-09-30 22:32:01.237572', 'step': 12605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:01.297580', 'step': 12605, 'epoch': 2} {'type': 'loss', 'content': 0.12895648181438446, 'timestamp': '2025-09-30 22:32:01.302092', 'step': 12606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:01.359853', 'step': 12606, 'epoch': 2} {'type': 'loss', 'content': 0.0910659208893776, 'timestamp': '2025-09-30 22:32:01.363404', 'step': 12607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:01.420847', 'step': 12607, 'epoch': 2} {'type': 'loss', 'content': 0.1891433149576187, 'timestamp': '2025-09-30 22:32:01.427645', 'step': 12608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:01.501193', 'step': 12608, 'epoch': 2} {'type': 'loss', 'content': 0.027499573305249214, 'timestamp': '2025-09-30 22:32:01.504935', 'step': 12609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:01.563072', 'step': 12609, 'epoch': 2} {'type': 'loss', 'content': 0.1531221866607666, 'timestamp': '2025-09-30 22:32:01.566186', 'step': 12610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:01.623398', 'step': 12610, 'epoch': 2} {'type': 'loss', 'content': 0.07143554836511612, 'timestamp': '2025-09-30 22:32:01.636870', 'step': 12611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:01.695068', 'step': 12611, 'epoch': 2} {'type': 'loss', 'content': 0.11333911865949631, 'timestamp': '2025-09-30 22:32:01.709248', 'step': 12612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:01.767539', 'step': 12612, 'epoch': 2} {'type': 'loss', 'content': 0.12977544963359833, 'timestamp': '2025-09-30 22:32:01.772729', 'step': 12613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:01.830741', 'step': 12613, 'epoch': 2} {'type': 'loss', 'content': 0.14743325114250183, 'timestamp': '2025-09-30 22:32:01.833908', 'step': 12614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:01.892997', 'step': 12614, 'epoch': 2} {'type': 'loss', 'content': 0.13016322255134583, 'timestamp': '2025-09-30 22:32:01.896724', 'step': 12615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:01.955704', 'step': 12615, 'epoch': 2} {'type': 'loss', 'content': 0.08597962558269501, 'timestamp': '2025-09-30 22:32:01.962852', 'step': 12616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.022412', 'step': 12616, 'epoch': 2} {'type': 'loss', 'content': 0.1073029562830925, 'timestamp': '2025-09-30 22:32:02.029579', 'step': 12617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.091999', 'step': 12617, 'epoch': 2} {'type': 'loss', 'content': 0.09700952470302582, 'timestamp': '2025-09-30 22:32:02.100768', 'step': 12618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.158255', 'step': 12618, 'epoch': 2} {'type': 'loss', 'content': 0.12078795582056046, 'timestamp': '2025-09-30 22:32:02.162145', 'step': 12619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.220066', 'step': 12619, 'epoch': 2} {'type': 'loss', 'content': 0.14690114557743073, 'timestamp': '2025-09-30 22:32:02.227897', 'step': 12620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.284655', 'step': 12620, 'epoch': 2} {'type': 'loss', 'content': 0.06823993474245071, 'timestamp': '2025-09-30 22:32:02.296011', 'step': 12621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.355104', 'step': 12621, 'epoch': 2} {'type': 'loss', 'content': 0.13067004084587097, 'timestamp': '2025-09-30 22:32:02.360382', 'step': 12622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:02.423335', 'step': 12622, 'epoch': 2} {'type': 'loss', 'content': 0.170597106218338, 'timestamp': '2025-09-30 22:32:02.426934', 'step': 12623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:02.485833', 'step': 12623, 'epoch': 2} {'type': 'loss', 'content': 0.15365560352802277, 'timestamp': '2025-09-30 22:32:02.500904', 'step': 12624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:02.557477', 'step': 12624, 'epoch': 2} {'type': 'loss', 'content': 0.1712125688791275, 'timestamp': '2025-09-30 22:32:02.561379', 'step': 12625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:02.619351', 'step': 12625, 'epoch': 2} {'type': 'loss', 'content': 0.09303624927997589, 'timestamp': '2025-09-30 22:32:02.630352', 'step': 12626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.694431', 'step': 12626, 'epoch': 2} {'type': 'loss', 'content': 0.17169471085071564, 'timestamp': '2025-09-30 22:32:02.699279', 'step': 12627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:02.757479', 'step': 12627, 'epoch': 2} {'type': 'loss', 'content': 0.08902102708816528, 'timestamp': '2025-09-30 22:32:02.764640', 'step': 12628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:02.821466', 'step': 12628, 'epoch': 2} {'type': 'loss', 'content': 0.19395004212856293, 'timestamp': '2025-09-30 22:32:02.824795', 'step': 12629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:02.882505', 'step': 12629, 'epoch': 2} {'type': 'loss', 'content': 0.12371909618377686, 'timestamp': '2025-09-30 22:32:02.890008', 'step': 12630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:02.947390', 'step': 12630, 'epoch': 2} {'type': 'loss', 'content': 0.17234665155410767, 'timestamp': '2025-09-30 22:32:02.950813', 'step': 12631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:03.008262', 'step': 12631, 'epoch': 2} {'type': 'loss', 'content': 0.1677900105714798, 'timestamp': '2025-09-30 22:32:03.015119', 'step': 12632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:03.080537', 'step': 12632, 'epoch': 2} {'type': 'loss', 'content': 0.043741609901189804, 'timestamp': '2025-09-30 22:32:03.096930', 'step': 12633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:03.156183', 'step': 12633, 'epoch': 2} {'type': 'loss', 'content': 0.10362811386585236, 'timestamp': '2025-09-30 22:32:03.159651', 'step': 12634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:03.237140', 'step': 12634, 'epoch': 2} {'type': 'loss', 'content': 0.13863398134708405, 'timestamp': '2025-09-30 22:32:03.241748', 'step': 12635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:03.301650', 'step': 12635, 'epoch': 2} {'type': 'loss', 'content': 0.0772007629275322, 'timestamp': '2025-09-30 22:32:03.308662', 'step': 12636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:32:03.365516', 'step': 12636, 'epoch': 2} {'type': 'loss', 'content': 0.09108676761388779, 'timestamp': '2025-09-30 22:32:03.368960', 'step': 12637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:03.435314', 'step': 12637, 'epoch': 2} {'type': 'loss', 'content': 0.05314227193593979, 'timestamp': '2025-09-30 22:32:03.438750', 'step': 12638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:03.496669', 'step': 12638, 'epoch': 2} {'type': 'loss', 'content': 0.10918281972408295, 'timestamp': '2025-09-30 22:32:03.499903', 'step': 12639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:03.566566', 'step': 12639, 'epoch': 2} {'type': 'loss', 'content': 0.148483544588089, 'timestamp': '2025-09-30 22:32:03.574466', 'step': 12640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:03.632961', 'step': 12640, 'epoch': 2} {'type': 'loss', 'content': 0.11433272063732147, 'timestamp': '2025-09-30 22:32:03.643220', 'step': 12641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:03.708628', 'step': 12641, 'epoch': 2} {'type': 'loss', 'content': 0.15278135240077972, 'timestamp': '2025-09-30 22:32:03.719652', 'step': 12642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:03.777370', 'step': 12642, 'epoch': 2} {'type': 'loss', 'content': 0.1131383627653122, 'timestamp': '2025-09-30 22:32:03.787515', 'step': 12643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:03.854163', 'step': 12643, 'epoch': 2} {'type': 'loss', 'content': 0.14316041767597198, 'timestamp': '2025-09-30 22:32:03.861765', 'step': 12644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:03.926065', 'step': 12644, 'epoch': 2} {'type': 'loss', 'content': 0.17133313417434692, 'timestamp': '2025-09-30 22:32:03.928982', 'step': 12645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:03.988116', 'step': 12645, 'epoch': 2} {'type': 'loss', 'content': 0.149566188454628, 'timestamp': '2025-09-30 22:32:03.998350', 'step': 12646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:04.060376', 'step': 12646, 'epoch': 2} {'type': 'loss', 'content': 0.10874922573566437, 'timestamp': '2025-09-30 22:32:04.066388', 'step': 12647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:04.127218', 'step': 12647, 'epoch': 2} {'type': 'loss', 'content': 0.18524670600891113, 'timestamp': '2025-09-30 22:32:04.133308', 'step': 12648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:04.198899', 'step': 12648, 'epoch': 2} {'type': 'loss', 'content': 0.15963253378868103, 'timestamp': '2025-09-30 22:32:04.203199', 'step': 12649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:04.261271', 'step': 12649, 'epoch': 2} {'type': 'loss', 'content': 0.10653677582740784, 'timestamp': '2025-09-30 22:32:04.265447', 'step': 12650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:04.325157', 'step': 12650, 'epoch': 2} {'type': 'loss', 'content': 0.10384690016508102, 'timestamp': '2025-09-30 22:32:04.328950', 'step': 12651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:04.398842', 'step': 12651, 'epoch': 2} {'type': 'loss', 'content': 0.15834011137485504, 'timestamp': '2025-09-30 22:32:04.408314', 'step': 12652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:04.466398', 'step': 12652, 'epoch': 2} {'type': 'loss', 'content': 0.0952746793627739, 'timestamp': '2025-09-30 22:32:04.469058', 'step': 12653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:04.528658', 'step': 12653, 'epoch': 2} {'type': 'loss', 'content': 0.10157155245542526, 'timestamp': '2025-09-30 22:32:04.531735', 'step': 12654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:04.594477', 'step': 12654, 'epoch': 2} {'type': 'loss', 'content': 0.09540607035160065, 'timestamp': '2025-09-30 22:32:04.597804', 'step': 12655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:04.665080', 'step': 12655, 'epoch': 2} {'type': 'loss', 'content': 0.10362300276756287, 'timestamp': '2025-09-30 22:32:04.671570', 'step': 12656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:04.744256', 'step': 12656, 'epoch': 2} {'type': 'loss', 'content': 0.20338886976242065, 'timestamp': '2025-09-30 22:32:04.747290', 'step': 12657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:04.814537', 'step': 12657, 'epoch': 2} {'type': 'loss', 'content': 0.14952607452869415, 'timestamp': '2025-09-30 22:32:04.820445', 'step': 12658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:04.881426', 'step': 12658, 'epoch': 2} {'type': 'loss', 'content': 0.11973368376493454, 'timestamp': '2025-09-30 22:32:04.886615', 'step': 12659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:04.950137', 'step': 12659, 'epoch': 2} {'type': 'loss', 'content': 0.19213084876537323, 'timestamp': '2025-09-30 22:32:04.963598', 'step': 12660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:05.026934', 'step': 12660, 'epoch': 2} {'type': 'loss', 'content': 0.20337536931037903, 'timestamp': '2025-09-30 22:32:05.031709', 'step': 12661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:05.090667', 'step': 12661, 'epoch': 2} {'type': 'loss', 'content': 0.15034301578998566, 'timestamp': '2025-09-30 22:32:05.101667', 'step': 12662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:05.160512', 'step': 12662, 'epoch': 2} {'type': 'loss', 'content': 0.11429433524608612, 'timestamp': '2025-09-30 22:32:05.164772', 'step': 12663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:05.223875', 'step': 12663, 'epoch': 2} {'type': 'loss', 'content': 0.11381370574235916, 'timestamp': '2025-09-30 22:32:05.230603', 'step': 12664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:05.288329', 'step': 12664, 'epoch': 2} {'type': 'loss', 'content': 0.09548220783472061, 'timestamp': '2025-09-30 22:32:05.298372', 'step': 12665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:05.367586', 'step': 12665, 'epoch': 2} {'type': 'loss', 'content': 0.09296220541000366, 'timestamp': '2025-09-30 22:32:05.376315', 'step': 12666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:05.434148', 'step': 12666, 'epoch': 2} {'type': 'loss', 'content': 0.11150194704532623, 'timestamp': '2025-09-30 22:32:05.444968', 'step': 12667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:05.515462', 'step': 12667, 'epoch': 2} {'type': 'loss', 'content': 0.14020241796970367, 'timestamp': '2025-09-30 22:32:05.522750', 'step': 12668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:05.580520', 'step': 12668, 'epoch': 2} {'type': 'loss', 'content': 0.13707242906093597, 'timestamp': '2025-09-30 22:32:05.584391', 'step': 12669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:05.643072', 'step': 12669, 'epoch': 2} {'type': 'loss', 'content': 0.13610535860061646, 'timestamp': '2025-09-30 22:32:05.648792', 'step': 12670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:05.730114', 'step': 12670, 'epoch': 2} {'type': 'loss', 'content': 0.060742657631635666, 'timestamp': '2025-09-30 22:32:05.733796', 'step': 12671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:05.793741', 'step': 12671, 'epoch': 2} {'type': 'loss', 'content': 0.14455105364322662, 'timestamp': '2025-09-30 22:32:05.802129', 'step': 12672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:05.859519', 'step': 12672, 'epoch': 2} {'type': 'loss', 'content': 0.08553628623485565, 'timestamp': '2025-09-30 22:32:05.876260', 'step': 12673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:05.941423', 'step': 12673, 'epoch': 2} {'type': 'loss', 'content': 0.19999343156814575, 'timestamp': '2025-09-30 22:32:05.945336', 'step': 12674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:06.003584', 'step': 12674, 'epoch': 2} {'type': 'loss', 'content': 0.08067724108695984, 'timestamp': '2025-09-30 22:32:06.007888', 'step': 12675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:06.065628', 'step': 12675, 'epoch': 2} {'type': 'loss', 'content': 0.12049504369497299, 'timestamp': '2025-09-30 22:32:06.073494', 'step': 12676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:06.130641', 'step': 12676, 'epoch': 2} {'type': 'loss', 'content': 0.1072382926940918, 'timestamp': '2025-09-30 22:32:06.134930', 'step': 12677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:06.195081', 'step': 12677, 'epoch': 2} {'type': 'loss', 'content': 0.1350565254688263, 'timestamp': '2025-09-30 22:32:06.201602', 'step': 12678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:06.260553', 'step': 12678, 'epoch': 2} {'type': 'loss', 'content': 0.09196153283119202, 'timestamp': '2025-09-30 22:32:06.264881', 'step': 12679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:06.322558', 'step': 12679, 'epoch': 2} {'type': 'loss', 'content': 0.13414672017097473, 'timestamp': '2025-09-30 22:32:06.332956', 'step': 12680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:06.392663', 'step': 12680, 'epoch': 2} {'type': 'loss', 'content': 0.248888298869133, 'timestamp': '2025-09-30 22:32:06.396925', 'step': 12681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:06.455559', 'step': 12681, 'epoch': 2} {'type': 'loss', 'content': 0.22563035786151886, 'timestamp': '2025-09-30 22:32:06.463852', 'step': 12682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:06.537434', 'step': 12682, 'epoch': 2} {'type': 'loss', 'content': 0.06610995531082153, 'timestamp': '2025-09-30 22:32:06.541096', 'step': 12683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:06.598792', 'step': 12683, 'epoch': 2} {'type': 'loss', 'content': 0.15159237384796143, 'timestamp': '2025-09-30 22:32:06.606187', 'step': 12684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:06.663884', 'step': 12684, 'epoch': 2} {'type': 'loss', 'content': 0.07864320278167725, 'timestamp': '2025-09-30 22:32:06.667567', 'step': 12685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:06.725213', 'step': 12685, 'epoch': 2} {'type': 'loss', 'content': 0.09672896564006805, 'timestamp': '2025-09-30 22:32:06.737120', 'step': 12686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:06.804147', 'step': 12686, 'epoch': 2} {'type': 'loss', 'content': 0.15921635925769806, 'timestamp': '2025-09-30 22:32:06.808041', 'step': 12687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:06.879577', 'step': 12687, 'epoch': 2} {'type': 'loss', 'content': 0.10030055791139603, 'timestamp': '2025-09-30 22:32:06.893059', 'step': 12688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:06.952564', 'step': 12688, 'epoch': 2} {'type': 'loss', 'content': 0.1394098699092865, 'timestamp': '2025-09-30 22:32:06.958139', 'step': 12689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:07.031838', 'step': 12689, 'epoch': 2} {'type': 'loss', 'content': 0.13586623966693878, 'timestamp': '2025-09-30 22:32:07.035913', 'step': 12690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:07.103754', 'step': 12690, 'epoch': 2} {'type': 'loss', 'content': 0.05950598046183586, 'timestamp': '2025-09-30 22:32:07.106931', 'step': 12691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:07.163921', 'step': 12691, 'epoch': 2} {'type': 'loss', 'content': 0.1230015903711319, 'timestamp': '2025-09-30 22:32:07.174043', 'step': 12692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:07.236435', 'step': 12692, 'epoch': 2} {'type': 'loss', 'content': 0.07433634251356125, 'timestamp': '2025-09-30 22:32:07.249097', 'step': 12693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:07.316101', 'step': 12693, 'epoch': 2} {'type': 'loss', 'content': 0.21862183511257172, 'timestamp': '2025-09-30 22:32:07.325214', 'step': 12694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:07.382947', 'step': 12694, 'epoch': 2} {'type': 'loss', 'content': 0.03651147335767746, 'timestamp': '2025-09-30 22:32:07.393755', 'step': 12695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:07.460976', 'step': 12695, 'epoch': 2} {'type': 'loss', 'content': 0.09381269663572311, 'timestamp': '2025-09-30 22:32:07.467254', 'step': 12696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:07.533422', 'step': 12696, 'epoch': 2} {'type': 'loss', 'content': 0.08124098181724548, 'timestamp': '2025-09-30 22:32:07.545335', 'step': 12697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:07.603855', 'step': 12697, 'epoch': 2} {'type': 'loss', 'content': 0.18744923174381256, 'timestamp': '2025-09-30 22:32:07.609164', 'step': 12698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:07.671743', 'step': 12698, 'epoch': 2} {'type': 'loss', 'content': 0.11072181165218353, 'timestamp': '2025-09-30 22:32:07.677048', 'step': 12699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:07.735349', 'step': 12699, 'epoch': 2} {'type': 'loss', 'content': 0.17485809326171875, 'timestamp': '2025-09-30 22:32:07.742207', 'step': 12700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:07.822809', 'step': 12700, 'epoch': 2} {'type': 'loss', 'content': 0.0994345098733902, 'timestamp': '2025-09-30 22:32:07.826267', 'step': 12701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:07.884207', 'step': 12701, 'epoch': 2} {'type': 'loss', 'content': 0.12087256461381912, 'timestamp': '2025-09-30 22:32:07.887671', 'step': 12702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:07.955738', 'step': 12702, 'epoch': 2} {'type': 'loss', 'content': 0.07644831389188766, 'timestamp': '2025-09-30 22:32:07.960645', 'step': 12703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:08.017686', 'step': 12703, 'epoch': 2} {'type': 'loss', 'content': 0.1344520002603531, 'timestamp': '2025-09-30 22:32:08.024628', 'step': 12704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:08.093723', 'step': 12704, 'epoch': 2} {'type': 'loss', 'content': 0.05450856685638428, 'timestamp': '2025-09-30 22:32:08.097621', 'step': 12705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:08.174740', 'step': 12705, 'epoch': 2} {'type': 'loss', 'content': 0.09791044145822525, 'timestamp': '2025-09-30 22:32:08.193923', 'step': 12706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:08.259819', 'step': 12706, 'epoch': 2} {'type': 'loss', 'content': 0.12608398497104645, 'timestamp': '2025-09-30 22:32:08.263570', 'step': 12707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:08.322641', 'step': 12707, 'epoch': 2} {'type': 'loss', 'content': 0.09560027718544006, 'timestamp': '2025-09-30 22:32:08.329531', 'step': 12708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:08.388469', 'step': 12708, 'epoch': 2} {'type': 'loss', 'content': 0.10257010161876678, 'timestamp': '2025-09-30 22:32:08.391855', 'step': 12709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:08.460690', 'step': 12709, 'epoch': 2} {'type': 'loss', 'content': 0.15997843444347382, 'timestamp': '2025-09-30 22:32:08.473819', 'step': 12710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:08.532200', 'step': 12710, 'epoch': 2} {'type': 'loss', 'content': 0.08595901727676392, 'timestamp': '2025-09-30 22:32:08.538016', 'step': 12711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:08.597399', 'step': 12711, 'epoch': 2} {'type': 'loss', 'content': 0.16093917191028595, 'timestamp': '2025-09-30 22:32:08.612157', 'step': 12712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:08.670016', 'step': 12712, 'epoch': 2} {'type': 'loss', 'content': 0.1620481163263321, 'timestamp': '2025-09-30 22:32:08.680191', 'step': 12713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:08.745777', 'step': 12713, 'epoch': 2} {'type': 'loss', 'content': 0.17154622077941895, 'timestamp': '2025-09-30 22:32:08.748635', 'step': 12714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:08.813884', 'step': 12714, 'epoch': 2} {'type': 'loss', 'content': 0.131280317902565, 'timestamp': '2025-09-30 22:32:08.826627', 'step': 12715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:08.885443', 'step': 12715, 'epoch': 2} {'type': 'loss', 'content': 0.12449027597904205, 'timestamp': '2025-09-30 22:32:08.892636', 'step': 12716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:08.966722', 'step': 12716, 'epoch': 2} {'type': 'loss', 'content': 0.19126158952713013, 'timestamp': '2025-09-30 22:32:08.969882', 'step': 12717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:09.027352', 'step': 12717, 'epoch': 2} {'type': 'loss', 'content': 0.09704890102148056, 'timestamp': '2025-09-30 22:32:09.037890', 'step': 12718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:09.099259', 'step': 12718, 'epoch': 2} {'type': 'loss', 'content': 0.047419533133506775, 'timestamp': '2025-09-30 22:32:09.103052', 'step': 12719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:09.160335', 'step': 12719, 'epoch': 2} {'type': 'loss', 'content': 0.06967414170503616, 'timestamp': '2025-09-30 22:32:09.167482', 'step': 12720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:09.224704', 'step': 12720, 'epoch': 2} {'type': 'loss', 'content': 0.05666537955403328, 'timestamp': '2025-09-30 22:32:09.236450', 'step': 12721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:09.301857', 'step': 12721, 'epoch': 2} {'type': 'loss', 'content': 0.06432656198740005, 'timestamp': '2025-09-30 22:32:09.306280', 'step': 12722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:09.372339', 'step': 12722, 'epoch': 2} {'type': 'loss', 'content': 0.0613676942884922, 'timestamp': '2025-09-30 22:32:09.375414', 'step': 12723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:09.433451', 'step': 12723, 'epoch': 2} {'type': 'loss', 'content': 0.08411059528589249, 'timestamp': '2025-09-30 22:32:09.440344', 'step': 12724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:09.497064', 'step': 12724, 'epoch': 2} {'type': 'loss', 'content': 0.2132706642150879, 'timestamp': '2025-09-30 22:32:09.511229', 'step': 12725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:09.570125', 'step': 12725, 'epoch': 2} {'type': 'loss', 'content': 0.1594625860452652, 'timestamp': '2025-09-30 22:32:09.573644', 'step': 12726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:09.633268', 'step': 12726, 'epoch': 2} {'type': 'loss', 'content': 0.12029174715280533, 'timestamp': '2025-09-30 22:32:09.637498', 'step': 12727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:09.698850', 'step': 12727, 'epoch': 2} {'type': 'loss', 'content': 0.048471849411726, 'timestamp': '2025-09-30 22:32:09.716666', 'step': 12728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:09.786104', 'step': 12728, 'epoch': 2} {'type': 'loss', 'content': 0.14981529116630554, 'timestamp': '2025-09-30 22:32:09.790877', 'step': 12729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:09.850511', 'step': 12729, 'epoch': 2} {'type': 'loss', 'content': 0.1082020029425621, 'timestamp': '2025-09-30 22:32:09.854065', 'step': 12730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:09.913340', 'step': 12730, 'epoch': 2} {'type': 'loss', 'content': 0.13617222011089325, 'timestamp': '2025-09-30 22:32:09.928276', 'step': 12731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:09.997323', 'step': 12731, 'epoch': 2} {'type': 'loss', 'content': 0.06403100490570068, 'timestamp': '2025-09-30 22:32:10.005321', 'step': 12732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:10.064138', 'step': 12732, 'epoch': 2} {'type': 'loss', 'content': 0.16541075706481934, 'timestamp': '2025-09-30 22:32:10.077531', 'step': 12733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:10.135756', 'step': 12733, 'epoch': 2} {'type': 'loss', 'content': 0.2239728718996048, 'timestamp': '2025-09-30 22:32:10.140161', 'step': 12734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:10.199313', 'step': 12734, 'epoch': 2} {'type': 'loss', 'content': 0.16322624683380127, 'timestamp': '2025-09-30 22:32:10.204566', 'step': 12735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:10.264542', 'step': 12735, 'epoch': 2} {'type': 'loss', 'content': 0.10357265919446945, 'timestamp': '2025-09-30 22:32:10.274336', 'step': 12736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:10.333319', 'step': 12736, 'epoch': 2} {'type': 'loss', 'content': 0.06764940172433853, 'timestamp': '2025-09-30 22:32:10.337852', 'step': 12737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:10.396792', 'step': 12737, 'epoch': 2} {'type': 'loss', 'content': 0.10873470455408096, 'timestamp': '2025-09-30 22:32:10.416745', 'step': 12738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:10.475924', 'step': 12738, 'epoch': 2} {'type': 'loss', 'content': 0.22493495047092438, 'timestamp': '2025-09-30 22:32:10.479981', 'step': 12739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:10.548633', 'step': 12739, 'epoch': 2} {'type': 'loss', 'content': 0.08828115463256836, 'timestamp': '2025-09-30 22:32:10.556258', 'step': 12740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:10.613305', 'step': 12740, 'epoch': 2} {'type': 'loss', 'content': 0.17017444968223572, 'timestamp': '2025-09-30 22:32:10.630906', 'step': 12741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:10.691223', 'step': 12741, 'epoch': 2} {'type': 'loss', 'content': 0.17874188721179962, 'timestamp': '2025-09-30 22:32:10.696663', 'step': 12742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:10.762201', 'step': 12742, 'epoch': 2} {'type': 'loss', 'content': 0.11143461614847183, 'timestamp': '2025-09-30 22:32:10.766509', 'step': 12743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:10.835910', 'step': 12743, 'epoch': 2} {'type': 'loss', 'content': 0.09920145571231842, 'timestamp': '2025-09-30 22:32:10.843015', 'step': 12744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:10.903167', 'step': 12744, 'epoch': 2} {'type': 'loss', 'content': 0.15333525836467743, 'timestamp': '2025-09-30 22:32:10.907866', 'step': 12745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:10.967880', 'step': 12745, 'epoch': 2} {'type': 'loss', 'content': 0.28094449639320374, 'timestamp': '2025-09-30 22:32:10.971801', 'step': 12746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:11.031789', 'step': 12746, 'epoch': 2} {'type': 'loss', 'content': 0.10234782844781876, 'timestamp': '2025-09-30 22:32:11.035765', 'step': 12747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:11.100773', 'step': 12747, 'epoch': 2} {'type': 'loss', 'content': 0.10224756598472595, 'timestamp': '2025-09-30 22:32:11.117752', 'step': 12748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:11.175851', 'step': 12748, 'epoch': 2} {'type': 'loss', 'content': 0.1864997148513794, 'timestamp': '2025-09-30 22:32:11.190367', 'step': 12749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:11.251694', 'step': 12749, 'epoch': 2} {'type': 'loss', 'content': 0.10678749531507492, 'timestamp': '2025-09-30 22:32:11.257502', 'step': 12750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:11.319073', 'step': 12750, 'epoch': 2} {'type': 'loss', 'content': 0.18135584890842438, 'timestamp': '2025-09-30 22:32:11.323084', 'step': 12751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:11.383116', 'step': 12751, 'epoch': 2} {'type': 'loss', 'content': 0.1451752781867981, 'timestamp': '2025-09-30 22:32:11.391117', 'step': 12752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:11.449224', 'step': 12752, 'epoch': 2} {'type': 'loss', 'content': 0.20909802615642548, 'timestamp': '2025-09-30 22:32:11.464373', 'step': 12753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:11.522594', 'step': 12753, 'epoch': 2} {'type': 'loss', 'content': 0.0790054202079773, 'timestamp': '2025-09-30 22:32:11.527753', 'step': 12754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:11.598349', 'step': 12754, 'epoch': 2} {'type': 'loss', 'content': 0.0908265933394432, 'timestamp': '2025-09-30 22:32:11.603348', 'step': 12755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:11.676479', 'step': 12755, 'epoch': 2} {'type': 'loss', 'content': 0.11755312234163284, 'timestamp': '2025-09-30 22:32:11.686149', 'step': 12756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:11.747748', 'step': 12756, 'epoch': 2} {'type': 'loss', 'content': 0.145539790391922, 'timestamp': '2025-09-30 22:32:11.751951', 'step': 12757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:11.811229', 'step': 12757, 'epoch': 2} {'type': 'loss', 'content': 0.15815065801143646, 'timestamp': '2025-09-30 22:32:11.814899', 'step': 12758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:11.873691', 'step': 12758, 'epoch': 2} {'type': 'loss', 'content': 0.12336715310811996, 'timestamp': '2025-09-30 22:32:11.886636', 'step': 12759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:11.957959', 'step': 12759, 'epoch': 2} {'type': 'loss', 'content': 0.11427918821573257, 'timestamp': '2025-09-30 22:32:11.966702', 'step': 12760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.025053', 'step': 12760, 'epoch': 2} {'type': 'loss', 'content': 0.1065676286816597, 'timestamp': '2025-09-30 22:32:12.030492', 'step': 12761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:12.100317', 'step': 12761, 'epoch': 2} {'type': 'loss', 'content': 0.14208614826202393, 'timestamp': '2025-09-30 22:32:12.104106', 'step': 12762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.162385', 'step': 12762, 'epoch': 2} {'type': 'loss', 'content': 0.17174096405506134, 'timestamp': '2025-09-30 22:32:12.166983', 'step': 12763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.235841', 'step': 12763, 'epoch': 2} {'type': 'loss', 'content': 0.05278845131397247, 'timestamp': '2025-09-30 22:32:12.242422', 'step': 12764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.310558', 'step': 12764, 'epoch': 2} {'type': 'loss', 'content': 0.07589122653007507, 'timestamp': '2025-09-30 22:32:12.313772', 'step': 12765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:12.371089', 'step': 12765, 'epoch': 2} {'type': 'loss', 'content': 0.1273791640996933, 'timestamp': '2025-09-30 22:32:12.375113', 'step': 12766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.433531', 'step': 12766, 'epoch': 2} {'type': 'loss', 'content': 0.09503497928380966, 'timestamp': '2025-09-30 22:32:12.450391', 'step': 12767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:12.521714', 'step': 12767, 'epoch': 2} {'type': 'loss', 'content': 0.11916189640760422, 'timestamp': '2025-09-30 22:32:12.529498', 'step': 12768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.600919', 'step': 12768, 'epoch': 2} {'type': 'loss', 'content': 0.131818026304245, 'timestamp': '2025-09-30 22:32:12.617780', 'step': 12769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:12.677657', 'step': 12769, 'epoch': 2} {'type': 'loss', 'content': 0.11717721074819565, 'timestamp': '2025-09-30 22:32:12.682891', 'step': 12770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.754632', 'step': 12770, 'epoch': 2} {'type': 'loss', 'content': 0.06345605105161667, 'timestamp': '2025-09-30 22:32:12.768873', 'step': 12771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:12.840187', 'step': 12771, 'epoch': 2} {'type': 'loss', 'content': 0.09645544737577438, 'timestamp': '2025-09-30 22:32:12.857538', 'step': 12772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:12.916755', 'step': 12772, 'epoch': 2} {'type': 'loss', 'content': 0.16216014325618744, 'timestamp': '2025-09-30 22:32:12.920622', 'step': 12773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:12.979342', 'step': 12773, 'epoch': 2} {'type': 'loss', 'content': 0.24308432638645172, 'timestamp': '2025-09-30 22:32:12.984815', 'step': 12774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:13.043833', 'step': 12774, 'epoch': 2} {'type': 'loss', 'content': 0.1109650582075119, 'timestamp': '2025-09-30 22:32:13.048379', 'step': 12775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:13.106440', 'step': 12775, 'epoch': 2} {'type': 'loss', 'content': 0.1868627965450287, 'timestamp': '2025-09-30 22:32:13.114600', 'step': 12776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:13.174880', 'step': 12776, 'epoch': 2} {'type': 'loss', 'content': 0.09059227257966995, 'timestamp': '2025-09-30 22:32:13.178857', 'step': 12777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:13.239342', 'step': 12777, 'epoch': 2} {'type': 'loss', 'content': 0.07442762702703476, 'timestamp': '2025-09-30 22:32:13.259110', 'step': 12778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:13.345808', 'step': 12778, 'epoch': 2} {'type': 'loss', 'content': 0.10671572387218475, 'timestamp': '2025-09-30 22:32:13.350942', 'step': 12779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:13.410105', 'step': 12779, 'epoch': 2} {'type': 'loss', 'content': 0.12614980340003967, 'timestamp': '2025-09-30 22:32:13.419270', 'step': 12780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:13.495738', 'step': 12780, 'epoch': 2} {'type': 'loss', 'content': 0.08368448168039322, 'timestamp': '2025-09-30 22:32:13.499572', 'step': 12781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:13.567757', 'step': 12781, 'epoch': 2} {'type': 'loss', 'content': 0.08124647289514542, 'timestamp': '2025-09-30 22:32:13.581386', 'step': 12782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:13.640818', 'step': 12782, 'epoch': 2} {'type': 'loss', 'content': 0.10697140544652939, 'timestamp': '2025-09-30 22:32:13.645495', 'step': 12783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:13.723224', 'step': 12783, 'epoch': 2} {'type': 'loss', 'content': 0.21504029631614685, 'timestamp': '2025-09-30 22:32:13.731864', 'step': 12784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:13.793100', 'step': 12784, 'epoch': 2} {'type': 'loss', 'content': 0.13000807166099548, 'timestamp': '2025-09-30 22:32:13.798691', 'step': 12785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:13.870369', 'step': 12785, 'epoch': 2} {'type': 'loss', 'content': 0.07773932069540024, 'timestamp': '2025-09-30 22:32:13.874105', 'step': 12786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:13.945668', 'step': 12786, 'epoch': 2} {'type': 'loss', 'content': 0.11649546772241592, 'timestamp': '2025-09-30 22:32:13.950213', 'step': 12787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:14.010693', 'step': 12787, 'epoch': 2} {'type': 'loss', 'content': 0.10032039880752563, 'timestamp': '2025-09-30 22:32:14.018722', 'step': 12788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:14.090779', 'step': 12788, 'epoch': 2} {'type': 'loss', 'content': 0.1607765406370163, 'timestamp': '2025-09-30 22:32:14.109081', 'step': 12789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:32:14.169457', 'step': 12789, 'epoch': 2} {'type': 'loss', 'content': 0.10193867981433868, 'timestamp': '2025-09-30 22:32:14.174307', 'step': 12790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:14.235837', 'step': 12790, 'epoch': 2} {'type': 'loss', 'content': 0.14093053340911865, 'timestamp': '2025-09-30 22:32:14.251456', 'step': 12791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:14.311631', 'step': 12791, 'epoch': 2} {'type': 'loss', 'content': 0.16285161674022675, 'timestamp': '2025-09-30 22:32:14.319043', 'step': 12792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:14.379217', 'step': 12792, 'epoch': 2} {'type': 'loss', 'content': 0.1521649807691574, 'timestamp': '2025-09-30 22:32:14.383822', 'step': 12793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:14.444078', 'step': 12793, 'epoch': 2} {'type': 'loss', 'content': 0.06131415814161301, 'timestamp': '2025-09-30 22:32:14.448303', 'step': 12794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:14.508011', 'step': 12794, 'epoch': 2} {'type': 'loss', 'content': 0.2237733006477356, 'timestamp': '2025-09-30 22:32:14.511513', 'step': 12795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:14.570757', 'step': 12795, 'epoch': 2} {'type': 'loss', 'content': 0.1471906453371048, 'timestamp': '2025-09-30 22:32:14.578850', 'step': 12796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:14.637722', 'step': 12796, 'epoch': 2} {'type': 'loss', 'content': 0.10095688700675964, 'timestamp': '2025-09-30 22:32:14.643106', 'step': 12797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:14.703643', 'step': 12797, 'epoch': 2} {'type': 'loss', 'content': 0.15470905601978302, 'timestamp': '2025-09-30 22:32:14.709923', 'step': 12798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:14.771681', 'step': 12798, 'epoch': 2} {'type': 'loss', 'content': 0.14379726350307465, 'timestamp': '2025-09-30 22:32:14.778466', 'step': 12799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:14.838770', 'step': 12799, 'epoch': 2} {'type': 'loss', 'content': 0.0955825224518776, 'timestamp': '2025-09-30 22:32:14.846033', 'step': 12800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:14.926219', 'step': 12800, 'epoch': 2} {'type': 'loss', 'content': 0.06552083045244217, 'timestamp': '2025-09-30 22:32:14.938982', 'step': 12801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:14.996708', 'step': 12801, 'epoch': 2} {'type': 'loss', 'content': 0.0283610038459301, 'timestamp': '2025-09-30 22:32:15.000737', 'step': 12802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:15.059458', 'step': 12802, 'epoch': 2} {'type': 'loss', 'content': 0.1840805560350418, 'timestamp': '2025-09-30 22:32:15.076136', 'step': 12803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:15.135057', 'step': 12803, 'epoch': 2} {'type': 'loss', 'content': 0.14552977681159973, 'timestamp': '2025-09-30 22:32:15.144093', 'step': 12804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:15.202891', 'step': 12804, 'epoch': 2} {'type': 'loss', 'content': 0.13461686670780182, 'timestamp': '2025-09-30 22:32:15.207096', 'step': 12805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:15.265395', 'step': 12805, 'epoch': 2} {'type': 'loss', 'content': 0.1791776418685913, 'timestamp': '2025-09-30 22:32:15.270637', 'step': 12806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:15.329485', 'step': 12806, 'epoch': 2} {'type': 'loss', 'content': 0.18105065822601318, 'timestamp': '2025-09-30 22:32:15.333558', 'step': 12807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:15.392960', 'step': 12807, 'epoch': 2} {'type': 'loss', 'content': 0.18489882349967957, 'timestamp': '2025-09-30 22:32:15.402402', 'step': 12808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:15.461290', 'step': 12808, 'epoch': 2} {'type': 'loss', 'content': 0.1449260711669922, 'timestamp': '2025-09-30 22:32:15.465748', 'step': 12809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:15.524744', 'step': 12809, 'epoch': 2} {'type': 'loss', 'content': 0.1778520941734314, 'timestamp': '2025-09-30 22:32:15.529169', 'step': 12810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:15.587960', 'step': 12810, 'epoch': 2} {'type': 'loss', 'content': 0.13951274752616882, 'timestamp': '2025-09-30 22:32:15.591913', 'step': 12811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:15.663470', 'step': 12811, 'epoch': 2} {'type': 'loss', 'content': 0.17186114192008972, 'timestamp': '2025-09-30 22:32:15.673139', 'step': 12812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:15.732313', 'step': 12812, 'epoch': 2} {'type': 'loss', 'content': 0.09237317740917206, 'timestamp': '2025-09-30 22:32:15.737412', 'step': 12813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:15.797676', 'step': 12813, 'epoch': 2} {'type': 'loss', 'content': 0.10267674922943115, 'timestamp': '2025-09-30 22:32:15.802971', 'step': 12814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:15.877295', 'step': 12814, 'epoch': 2} {'type': 'loss', 'content': 0.15636655688285828, 'timestamp': '2025-09-30 22:32:15.883115', 'step': 12815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:15.942548', 'step': 12815, 'epoch': 2} {'type': 'loss', 'content': 0.16992267966270447, 'timestamp': '2025-09-30 22:32:15.950422', 'step': 12816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.008372', 'step': 12816, 'epoch': 2} {'type': 'loss', 'content': 0.1092669889330864, 'timestamp': '2025-09-30 22:32:16.012231', 'step': 12817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:16.072320', 'step': 12817, 'epoch': 2} {'type': 'loss', 'content': 0.1029767319560051, 'timestamp': '2025-09-30 22:32:16.076083', 'step': 12818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.148113', 'step': 12818, 'epoch': 2} {'type': 'loss', 'content': 0.07008948922157288, 'timestamp': '2025-09-30 22:32:16.151748', 'step': 12819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:16.213171', 'step': 12819, 'epoch': 2} {'type': 'loss', 'content': 0.1427474021911621, 'timestamp': '2025-09-30 22:32:16.220392', 'step': 12820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.287972', 'step': 12820, 'epoch': 2} {'type': 'loss', 'content': 0.10115496814250946, 'timestamp': '2025-09-30 22:32:16.291001', 'step': 12821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.361316', 'step': 12821, 'epoch': 2} {'type': 'loss', 'content': 0.09280247241258621, 'timestamp': '2025-09-30 22:32:16.365482', 'step': 12822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.426802', 'step': 12822, 'epoch': 2} {'type': 'loss', 'content': 0.12144401669502258, 'timestamp': '2025-09-30 22:32:16.431847', 'step': 12823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.500511', 'step': 12823, 'epoch': 2} {'type': 'loss', 'content': 0.10588732361793518, 'timestamp': '2025-09-30 22:32:16.508227', 'step': 12824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:16.565749', 'step': 12824, 'epoch': 2} {'type': 'loss', 'content': 0.11260686069726944, 'timestamp': '2025-09-30 22:32:16.569585', 'step': 12825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:16.627305', 'step': 12825, 'epoch': 2} {'type': 'loss', 'content': 0.11631187051534653, 'timestamp': '2025-09-30 22:32:16.631697', 'step': 12826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.692722', 'step': 12826, 'epoch': 2} {'type': 'loss', 'content': 0.06597056984901428, 'timestamp': '2025-09-30 22:32:16.696345', 'step': 12827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:16.756959', 'step': 12827, 'epoch': 2} {'type': 'loss', 'content': 0.07784159481525421, 'timestamp': '2025-09-30 22:32:16.782335', 'step': 12828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:16.869928', 'step': 12828, 'epoch': 2} {'type': 'loss', 'content': 0.15308472514152527, 'timestamp': '2025-09-30 22:32:16.873360', 'step': 12829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:16.934822', 'step': 12829, 'epoch': 2} {'type': 'loss', 'content': 0.10625030100345612, 'timestamp': '2025-09-30 22:32:16.938162', 'step': 12830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:17.015787', 'step': 12830, 'epoch': 2} {'type': 'loss', 'content': 0.11355733126401901, 'timestamp': '2025-09-30 22:32:17.025646', 'step': 12831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:17.090513', 'step': 12831, 'epoch': 2} {'type': 'loss', 'content': 0.16789403557777405, 'timestamp': '2025-09-30 22:32:17.100064', 'step': 12832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:17.166104', 'step': 12832, 'epoch': 2} {'type': 'loss', 'content': 0.10837797820568085, 'timestamp': '2025-09-30 22:32:17.169351', 'step': 12833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:17.231227', 'step': 12833, 'epoch': 2} {'type': 'loss', 'content': 0.08548878133296967, 'timestamp': '2025-09-30 22:32:17.244812', 'step': 12834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:17.328787', 'step': 12834, 'epoch': 2} {'type': 'loss', 'content': 0.07126899808645248, 'timestamp': '2025-09-30 22:32:17.333751', 'step': 12835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:17.391221', 'step': 12835, 'epoch': 2} {'type': 'loss', 'content': 0.07444722205400467, 'timestamp': '2025-09-30 22:32:17.398451', 'step': 12836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:17.469531', 'step': 12836, 'epoch': 2} {'type': 'loss', 'content': 0.09964717924594879, 'timestamp': '2025-09-30 22:32:17.473660', 'step': 12837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:17.533361', 'step': 12837, 'epoch': 2} {'type': 'loss', 'content': 0.09084882587194443, 'timestamp': '2025-09-30 22:32:17.536847', 'step': 12838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:17.597888', 'step': 12838, 'epoch': 2} {'type': 'loss', 'content': 0.1684432178735733, 'timestamp': '2025-09-30 22:32:17.601290', 'step': 12839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:17.661032', 'step': 12839, 'epoch': 2} {'type': 'loss', 'content': 0.1098690927028656, 'timestamp': '2025-09-30 22:32:17.676182', 'step': 12840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:17.741644', 'step': 12840, 'epoch': 2} {'type': 'loss', 'content': 0.08509431779384613, 'timestamp': '2025-09-30 22:32:17.746966', 'step': 12841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:17.816604', 'step': 12841, 'epoch': 2} {'type': 'loss', 'content': 0.11120197921991348, 'timestamp': '2025-09-30 22:32:17.821401', 'step': 12842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:17.879380', 'step': 12842, 'epoch': 2} {'type': 'loss', 'content': 0.06930365413427353, 'timestamp': '2025-09-30 22:32:17.883787', 'step': 12843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:17.948269', 'step': 12843, 'epoch': 2} {'type': 'loss', 'content': 0.12069816887378693, 'timestamp': '2025-09-30 22:32:17.957151', 'step': 12844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:18.016324', 'step': 12844, 'epoch': 2} {'type': 'loss', 'content': 0.08943335711956024, 'timestamp': '2025-09-30 22:32:18.021023', 'step': 12845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:18.085043', 'step': 12845, 'epoch': 2} {'type': 'loss', 'content': 0.06534682959318161, 'timestamp': '2025-09-30 22:32:18.088054', 'step': 12846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:18.145955', 'step': 12846, 'epoch': 2} {'type': 'loss', 'content': 0.09518714994192123, 'timestamp': '2025-09-30 22:32:18.168093', 'step': 12847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:18.251280', 'step': 12847, 'epoch': 2} {'type': 'loss', 'content': 0.12273512035608292, 'timestamp': '2025-09-30 22:32:18.261819', 'step': 12848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:18.321422', 'step': 12848, 'epoch': 2} {'type': 'loss', 'content': 0.11209923774003983, 'timestamp': '2025-09-30 22:32:18.327870', 'step': 12849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:18.399395', 'step': 12849, 'epoch': 2} {'type': 'loss', 'content': 0.1848304569721222, 'timestamp': '2025-09-30 22:32:18.404775', 'step': 12850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:18.464124', 'step': 12850, 'epoch': 2} {'type': 'loss', 'content': 0.0988614410161972, 'timestamp': '2025-09-30 22:32:18.469424', 'step': 12851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:18.532715', 'step': 12851, 'epoch': 2} {'type': 'loss', 'content': 0.20721417665481567, 'timestamp': '2025-09-30 22:32:18.543801', 'step': 12852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:18.603354', 'step': 12852, 'epoch': 2} {'type': 'loss', 'content': 0.06418021768331528, 'timestamp': '2025-09-30 22:32:18.610061', 'step': 12853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:18.670650', 'step': 12853, 'epoch': 2} {'type': 'loss', 'content': 0.06272023916244507, 'timestamp': '2025-09-30 22:32:18.676225', 'step': 12854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:18.740224', 'step': 12854, 'epoch': 2} {'type': 'loss', 'content': 0.16715353727340698, 'timestamp': '2025-09-30 22:32:18.747623', 'step': 12855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:18.823394', 'step': 12855, 'epoch': 2} {'type': 'loss', 'content': 0.15729443728923798, 'timestamp': '2025-09-30 22:32:18.831639', 'step': 12856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:18.889856', 'step': 12856, 'epoch': 2} {'type': 'loss', 'content': 0.09882935136556625, 'timestamp': '2025-09-30 22:32:18.894288', 'step': 12857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:18.953231', 'step': 12857, 'epoch': 2} {'type': 'loss', 'content': 0.16884031891822815, 'timestamp': '2025-09-30 22:32:18.956195', 'step': 12858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:19.021886', 'step': 12858, 'epoch': 2} {'type': 'loss', 'content': 0.164426788687706, 'timestamp': '2025-09-30 22:32:19.027655', 'step': 12859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:19.095285', 'step': 12859, 'epoch': 2} {'type': 'loss', 'content': 0.08113862574100494, 'timestamp': '2025-09-30 22:32:19.103359', 'step': 12860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:19.163228', 'step': 12860, 'epoch': 2} {'type': 'loss', 'content': 0.09874911606311798, 'timestamp': '2025-09-30 22:32:19.166782', 'step': 12861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:19.225080', 'step': 12861, 'epoch': 2} {'type': 'loss', 'content': 0.1060367301106453, 'timestamp': '2025-09-30 22:32:19.228835', 'step': 12862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:19.286523', 'step': 12862, 'epoch': 2} {'type': 'loss', 'content': 0.10234268009662628, 'timestamp': '2025-09-30 22:32:19.291567', 'step': 12863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:19.351175', 'step': 12863, 'epoch': 2} {'type': 'loss', 'content': 0.06628517806529999, 'timestamp': '2025-09-30 22:32:19.357759', 'step': 12864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:19.416352', 'step': 12864, 'epoch': 2} {'type': 'loss', 'content': 0.07194048166275024, 'timestamp': '2025-09-30 22:32:19.420858', 'step': 12865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:19.477464', 'step': 12865, 'epoch': 2} {'type': 'loss', 'content': 0.10226484388113022, 'timestamp': '2025-09-30 22:32:19.481424', 'step': 12866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:19.544339', 'step': 12866, 'epoch': 2} {'type': 'loss', 'content': 0.0929364413022995, 'timestamp': '2025-09-30 22:32:19.548600', 'step': 12867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:19.607323', 'step': 12867, 'epoch': 2} {'type': 'loss', 'content': 0.07543491572141647, 'timestamp': '2025-09-30 22:32:19.621919', 'step': 12868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:19.678901', 'step': 12868, 'epoch': 2} {'type': 'loss', 'content': 0.06809227913618088, 'timestamp': '2025-09-30 22:32:19.682281', 'step': 12869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:19.740372', 'step': 12869, 'epoch': 2} {'type': 'loss', 'content': 0.09630563855171204, 'timestamp': '2025-09-30 22:32:19.745070', 'step': 12870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:19.813426', 'step': 12870, 'epoch': 2} {'type': 'loss', 'content': 0.12016567587852478, 'timestamp': '2025-09-30 22:32:19.818311', 'step': 12871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:19.886459', 'step': 12871, 'epoch': 2} {'type': 'loss', 'content': 0.18472933769226074, 'timestamp': '2025-09-30 22:32:19.894087', 'step': 12872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:19.960744', 'step': 12872, 'epoch': 2} {'type': 'loss', 'content': 0.08053454756736755, 'timestamp': '2025-09-30 22:32:19.965456', 'step': 12873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:20.023711', 'step': 12873, 'epoch': 2} {'type': 'loss', 'content': 0.0851902961730957, 'timestamp': '2025-09-30 22:32:20.026892', 'step': 12874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:20.085213', 'step': 12874, 'epoch': 2} {'type': 'loss', 'content': 0.15120194852352142, 'timestamp': '2025-09-30 22:32:20.089792', 'step': 12875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:20.148391', 'step': 12875, 'epoch': 2} {'type': 'loss', 'content': 0.2150728404521942, 'timestamp': '2025-09-30 22:32:20.155057', 'step': 12876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:20.212049', 'step': 12876, 'epoch': 2} {'type': 'loss', 'content': 0.11367158591747284, 'timestamp': '2025-09-30 22:32:20.220619', 'step': 12877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:20.289810', 'step': 12877, 'epoch': 2} {'type': 'loss', 'content': 0.12523536384105682, 'timestamp': '2025-09-30 22:32:20.294632', 'step': 12878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:20.360203', 'step': 12878, 'epoch': 2} {'type': 'loss', 'content': 0.07005612552165985, 'timestamp': '2025-09-30 22:32:20.363638', 'step': 12879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:20.428313', 'step': 12879, 'epoch': 2} {'type': 'loss', 'content': 0.06694352626800537, 'timestamp': '2025-09-30 22:32:20.434686', 'step': 12880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:20.491683', 'step': 12880, 'epoch': 2} {'type': 'loss', 'content': 0.08830784261226654, 'timestamp': '2025-09-30 22:32:20.499240', 'step': 12881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:20.559555', 'step': 12881, 'epoch': 2} {'type': 'loss', 'content': 0.18588361144065857, 'timestamp': '2025-09-30 22:32:20.577566', 'step': 12882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:20.635516', 'step': 12882, 'epoch': 2} {'type': 'loss', 'content': 0.06096376106142998, 'timestamp': '2025-09-30 22:32:20.638843', 'step': 12883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:20.697477', 'step': 12883, 'epoch': 2} {'type': 'loss', 'content': 0.14393304288387299, 'timestamp': '2025-09-30 22:32:20.704552', 'step': 12884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:20.764789', 'step': 12884, 'epoch': 2} {'type': 'loss', 'content': 0.1474732756614685, 'timestamp': '2025-09-30 22:32:20.768504', 'step': 12885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:20.826862', 'step': 12885, 'epoch': 2} {'type': 'loss', 'content': 0.06949921697378159, 'timestamp': '2025-09-30 22:32:20.831573', 'step': 12886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:20.902678', 'step': 12886, 'epoch': 2} {'type': 'loss', 'content': 0.06557471305131912, 'timestamp': '2025-09-30 22:32:20.906440', 'step': 12887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:20.965515', 'step': 12887, 'epoch': 2} {'type': 'loss', 'content': 0.18351244926452637, 'timestamp': '2025-09-30 22:32:20.983457', 'step': 12888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:21.041236', 'step': 12888, 'epoch': 2} {'type': 'loss', 'content': 0.06333227455615997, 'timestamp': '2025-09-30 22:32:21.047628', 'step': 12889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:21.105925', 'step': 12889, 'epoch': 2} {'type': 'loss', 'content': 0.07534748315811157, 'timestamp': '2025-09-30 22:32:21.110251', 'step': 12890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:21.182327', 'step': 12890, 'epoch': 2} {'type': 'loss', 'content': 0.09184128791093826, 'timestamp': '2025-09-30 22:32:21.186127', 'step': 12891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:21.243982', 'step': 12891, 'epoch': 2} {'type': 'loss', 'content': 0.10316916555166245, 'timestamp': '2025-09-30 22:32:21.250958', 'step': 12892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:21.311398', 'step': 12892, 'epoch': 2} {'type': 'loss', 'content': 0.1278824806213379, 'timestamp': '2025-09-30 22:32:21.315498', 'step': 12893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:21.374961', 'step': 12893, 'epoch': 2} {'type': 'loss', 'content': 0.11405811458826065, 'timestamp': '2025-09-30 22:32:21.378098', 'step': 12894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:21.435860', 'step': 12894, 'epoch': 2} {'type': 'loss', 'content': 0.17773327231407166, 'timestamp': '2025-09-30 22:32:21.440198', 'step': 12895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:21.498066', 'step': 12895, 'epoch': 2} {'type': 'loss', 'content': 0.08847018331289291, 'timestamp': '2025-09-30 22:32:21.505590', 'step': 12896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:21.575837', 'step': 12896, 'epoch': 2} {'type': 'loss', 'content': 0.09812447428703308, 'timestamp': '2025-09-30 22:32:21.579125', 'step': 12897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:21.637502', 'step': 12897, 'epoch': 2} {'type': 'loss', 'content': 0.10189294070005417, 'timestamp': '2025-09-30 22:32:21.640351', 'step': 12898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:21.705302', 'step': 12898, 'epoch': 2} {'type': 'loss', 'content': 0.10909111052751541, 'timestamp': '2025-09-30 22:32:21.709363', 'step': 12899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:21.767483', 'step': 12899, 'epoch': 2} {'type': 'loss', 'content': 0.10591556131839752, 'timestamp': '2025-09-30 22:32:21.775959', 'step': 12900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:21.836060', 'step': 12900, 'epoch': 2} {'type': 'loss', 'content': 0.1725309193134308, 'timestamp': '2025-09-30 22:32:21.839392', 'step': 12901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:21.908440', 'step': 12901, 'epoch': 2} {'type': 'loss', 'content': 0.13666625320911407, 'timestamp': '2025-09-30 22:32:21.913580', 'step': 12902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:21.972700', 'step': 12902, 'epoch': 2} {'type': 'loss', 'content': 0.11446546018123627, 'timestamp': '2025-09-30 22:32:21.977476', 'step': 12903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:22.036194', 'step': 12903, 'epoch': 2} {'type': 'loss', 'content': 0.11224763840436935, 'timestamp': '2025-09-30 22:32:22.043087', 'step': 12904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:22.110632', 'step': 12904, 'epoch': 2} {'type': 'loss', 'content': 0.09254730492830276, 'timestamp': '2025-09-30 22:32:22.115399', 'step': 12905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:22.180224', 'step': 12905, 'epoch': 2} {'type': 'loss', 'content': 0.12861879169940948, 'timestamp': '2025-09-30 22:32:22.183834', 'step': 12906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:22.241473', 'step': 12906, 'epoch': 2} {'type': 'loss', 'content': 0.07179182022809982, 'timestamp': '2025-09-30 22:32:22.252534', 'step': 12907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:22.310246', 'step': 12907, 'epoch': 2} {'type': 'loss', 'content': 0.07121509313583374, 'timestamp': '2025-09-30 22:32:22.316904', 'step': 12908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:22.379471', 'step': 12908, 'epoch': 2} {'type': 'loss', 'content': 0.13994957506656647, 'timestamp': '2025-09-30 22:32:22.382742', 'step': 12909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:22.470201', 'step': 12909, 'epoch': 2} {'type': 'loss', 'content': 0.12542501091957092, 'timestamp': '2025-09-30 22:32:22.473817', 'step': 12910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:22.531447', 'step': 12910, 'epoch': 2} {'type': 'loss', 'content': 0.20735102891921997, 'timestamp': '2025-09-30 22:32:22.534834', 'step': 12911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:22.598297', 'step': 12911, 'epoch': 2} {'type': 'loss', 'content': 0.07673534750938416, 'timestamp': '2025-09-30 22:32:22.607525', 'step': 12912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:22.674862', 'step': 12912, 'epoch': 2} {'type': 'loss', 'content': 0.11648079752922058, 'timestamp': '2025-09-30 22:32:22.679285', 'step': 12913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:22.738444', 'step': 12913, 'epoch': 2} {'type': 'loss', 'content': 0.21168123185634613, 'timestamp': '2025-09-30 22:32:22.744574', 'step': 12914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:22.803307', 'step': 12914, 'epoch': 2} {'type': 'loss', 'content': 0.12875054776668549, 'timestamp': '2025-09-30 22:32:22.806160', 'step': 12915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:22.876473', 'step': 12915, 'epoch': 2} {'type': 'loss', 'content': 0.09401948004961014, 'timestamp': '2025-09-30 22:32:22.884261', 'step': 12916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:22.950809', 'step': 12916, 'epoch': 2} {'type': 'loss', 'content': 0.09366113692522049, 'timestamp': '2025-09-30 22:32:22.963625', 'step': 12917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:23.040223', 'step': 12917, 'epoch': 2} {'type': 'loss', 'content': 0.22982239723205566, 'timestamp': '2025-09-30 22:32:23.054442', 'step': 12918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:23.125232', 'step': 12918, 'epoch': 2} {'type': 'loss', 'content': 0.08789592981338501, 'timestamp': '2025-09-30 22:32:23.128346', 'step': 12919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:23.200705', 'step': 12919, 'epoch': 2} {'type': 'loss', 'content': 0.15278005599975586, 'timestamp': '2025-09-30 22:32:23.209509', 'step': 12920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:23.269824', 'step': 12920, 'epoch': 2} {'type': 'loss', 'content': 0.07846726477146149, 'timestamp': '2025-09-30 22:32:23.281544', 'step': 12921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:23.342052', 'step': 12921, 'epoch': 2} {'type': 'loss', 'content': 0.09973820298910141, 'timestamp': '2025-09-30 22:32:23.345538', 'step': 12922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:23.403945', 'step': 12922, 'epoch': 2} {'type': 'loss', 'content': 0.08505699038505554, 'timestamp': '2025-09-30 22:32:23.408005', 'step': 12923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:23.468501', 'step': 12923, 'epoch': 2} {'type': 'loss', 'content': 0.05137697607278824, 'timestamp': '2025-09-30 22:32:23.475475', 'step': 12924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:23.541816', 'step': 12924, 'epoch': 2} {'type': 'loss', 'content': 0.0629836842417717, 'timestamp': '2025-09-30 22:32:23.545763', 'step': 12925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:23.604169', 'step': 12925, 'epoch': 2} {'type': 'loss', 'content': 0.1333824247121811, 'timestamp': '2025-09-30 22:32:23.609227', 'step': 12926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:23.686429', 'step': 12926, 'epoch': 2} {'type': 'loss', 'content': 0.11723139137029648, 'timestamp': '2025-09-30 22:32:23.692686', 'step': 12927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:23.769506', 'step': 12927, 'epoch': 2} {'type': 'loss', 'content': 0.17126265168190002, 'timestamp': '2025-09-30 22:32:23.779422', 'step': 12928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:23.844768', 'step': 12928, 'epoch': 2} {'type': 'loss', 'content': 0.06702427566051483, 'timestamp': '2025-09-30 22:32:23.848185', 'step': 12929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:23.907830', 'step': 12929, 'epoch': 2} {'type': 'loss', 'content': 0.19567392766475677, 'timestamp': '2025-09-30 22:32:23.910536', 'step': 12930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:23.969064', 'step': 12930, 'epoch': 2} {'type': 'loss', 'content': 0.11815999448299408, 'timestamp': '2025-09-30 22:32:23.972900', 'step': 12931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:24.040176', 'step': 12931, 'epoch': 2} {'type': 'loss', 'content': 0.08285166323184967, 'timestamp': '2025-09-30 22:32:24.047230', 'step': 12932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:24.108938', 'step': 12932, 'epoch': 2} {'type': 'loss', 'content': 0.14320451021194458, 'timestamp': '2025-09-30 22:32:24.113143', 'step': 12933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:24.180830', 'step': 12933, 'epoch': 2} {'type': 'loss', 'content': 0.14237840473651886, 'timestamp': '2025-09-30 22:32:24.184636', 'step': 12934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:24.254628', 'step': 12934, 'epoch': 2} {'type': 'loss', 'content': 0.07595059275627136, 'timestamp': '2025-09-30 22:32:24.265898', 'step': 12935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:24.328611', 'step': 12935, 'epoch': 2} {'type': 'loss', 'content': 0.07607608288526535, 'timestamp': '2025-09-30 22:32:24.335784', 'step': 12936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:24.394787', 'step': 12936, 'epoch': 2} {'type': 'loss', 'content': 0.2089022994041443, 'timestamp': '2025-09-30 22:32:24.406926', 'step': 12937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:24.467054', 'step': 12937, 'epoch': 2} {'type': 'loss', 'content': 0.15094205737113953, 'timestamp': '2025-09-30 22:32:24.470716', 'step': 12938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:24.533506', 'step': 12938, 'epoch': 2} {'type': 'loss', 'content': 0.12542161345481873, 'timestamp': '2025-09-30 22:32:24.538028', 'step': 12939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:24.597861', 'step': 12939, 'epoch': 2} {'type': 'loss', 'content': 0.08096666634082794, 'timestamp': '2025-09-30 22:32:24.606485', 'step': 12940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:24.664813', 'step': 12940, 'epoch': 2} {'type': 'loss', 'content': 0.18118689954280853, 'timestamp': '2025-09-30 22:32:24.668229', 'step': 12941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:24.729898', 'step': 12941, 'epoch': 2} {'type': 'loss', 'content': 0.05256607010960579, 'timestamp': '2025-09-30 22:32:24.735388', 'step': 12942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:24.793966', 'step': 12942, 'epoch': 2} {'type': 'loss', 'content': 0.08902869373559952, 'timestamp': '2025-09-30 22:32:24.798910', 'step': 12943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:24.874379', 'step': 12943, 'epoch': 2} {'type': 'loss', 'content': 0.14841145277023315, 'timestamp': '2025-09-30 22:32:24.883088', 'step': 12944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:24.941749', 'step': 12944, 'epoch': 2} {'type': 'loss', 'content': 0.07830537110567093, 'timestamp': '2025-09-30 22:32:24.946230', 'step': 12945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:25.004606', 'step': 12945, 'epoch': 2} {'type': 'loss', 'content': 0.22644108533859253, 'timestamp': '2025-09-30 22:32:25.019369', 'step': 12946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:25.077528', 'step': 12946, 'epoch': 2} {'type': 'loss', 'content': 0.10988114029169083, 'timestamp': '2025-09-30 22:32:25.081921', 'step': 12947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:25.140061', 'step': 12947, 'epoch': 2} {'type': 'loss', 'content': 0.11688639968633652, 'timestamp': '2025-09-30 22:32:25.147971', 'step': 12948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:25.218809', 'step': 12948, 'epoch': 2} {'type': 'loss', 'content': 0.07316816598176956, 'timestamp': '2025-09-30 22:32:25.222341', 'step': 12949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:25.279745', 'step': 12949, 'epoch': 2} {'type': 'loss', 'content': 0.12239319831132889, 'timestamp': '2025-09-30 22:32:25.289776', 'step': 12950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:25.348874', 'step': 12950, 'epoch': 2} {'type': 'loss', 'content': 0.03135054558515549, 'timestamp': '2025-09-30 22:32:25.362063', 'step': 12951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:25.431165', 'step': 12951, 'epoch': 2} {'type': 'loss', 'content': 0.07777507603168488, 'timestamp': '2025-09-30 22:32:25.438179', 'step': 12952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:25.504791', 'step': 12952, 'epoch': 2} {'type': 'loss', 'content': 0.13502515852451324, 'timestamp': '2025-09-30 22:32:25.511053', 'step': 12953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:25.580327', 'step': 12953, 'epoch': 2} {'type': 'loss', 'content': 0.0821661725640297, 'timestamp': '2025-09-30 22:32:25.584468', 'step': 12954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:25.641857', 'step': 12954, 'epoch': 2} {'type': 'loss', 'content': 0.15058085322380066, 'timestamp': '2025-09-30 22:32:25.646672', 'step': 12955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:25.705836', 'step': 12955, 'epoch': 2} {'type': 'loss', 'content': 0.17840859293937683, 'timestamp': '2025-09-30 22:32:25.713287', 'step': 12956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:25.771357', 'step': 12956, 'epoch': 2} {'type': 'loss', 'content': 0.09952694922685623, 'timestamp': '2025-09-30 22:32:25.775071', 'step': 12957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:25.840644', 'step': 12957, 'epoch': 2} {'type': 'loss', 'content': 0.06699579209089279, 'timestamp': '2025-09-30 22:32:25.856362', 'step': 12958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:25.914175', 'step': 12958, 'epoch': 2} {'type': 'loss', 'content': 0.12104300409555435, 'timestamp': '2025-09-30 22:32:25.929010', 'step': 12959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:25.988963', 'step': 12959, 'epoch': 2} {'type': 'loss', 'content': 0.14471252262592316, 'timestamp': '2025-09-30 22:32:25.995761', 'step': 12960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:26.054936', 'step': 12960, 'epoch': 2} {'type': 'loss', 'content': 0.13811762630939484, 'timestamp': '2025-09-30 22:32:26.059924', 'step': 12961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:26.118651', 'step': 12961, 'epoch': 2} {'type': 'loss', 'content': 0.2048061639070511, 'timestamp': '2025-09-30 22:32:26.122148', 'step': 12962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:26.181138', 'step': 12962, 'epoch': 2} {'type': 'loss', 'content': 0.01302166935056448, 'timestamp': '2025-09-30 22:32:26.184423', 'step': 12963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:26.242048', 'step': 12963, 'epoch': 2} {'type': 'loss', 'content': 0.09787347912788391, 'timestamp': '2025-09-30 22:32:26.249425', 'step': 12964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:26.307436', 'step': 12964, 'epoch': 2} {'type': 'loss', 'content': 0.13847337663173676, 'timestamp': '2025-09-30 22:32:26.321998', 'step': 12965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:26.381998', 'step': 12965, 'epoch': 2} {'type': 'loss', 'content': 0.1542830467224121, 'timestamp': '2025-09-30 22:32:26.387943', 'step': 12966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:26.445452', 'step': 12966, 'epoch': 2} {'type': 'loss', 'content': 0.17534123361110687, 'timestamp': '2025-09-30 22:32:26.451220', 'step': 12967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:26.508998', 'step': 12967, 'epoch': 2} {'type': 'loss', 'content': 0.11657942831516266, 'timestamp': '2025-09-30 22:32:26.517500', 'step': 12968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:26.575286', 'step': 12968, 'epoch': 2} {'type': 'loss', 'content': 0.12888853251934052, 'timestamp': '2025-09-30 22:32:26.578964', 'step': 12969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:26.636566', 'step': 12969, 'epoch': 2} {'type': 'loss', 'content': 0.13836395740509033, 'timestamp': '2025-09-30 22:32:26.642727', 'step': 12970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:26.702651', 'step': 12970, 'epoch': 2} {'type': 'loss', 'content': 0.11582046002149582, 'timestamp': '2025-09-30 22:32:26.707264', 'step': 12971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:26.771061', 'step': 12971, 'epoch': 2} {'type': 'loss', 'content': 0.07399804145097733, 'timestamp': '2025-09-30 22:32:26.788014', 'step': 12972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:26.847983', 'step': 12972, 'epoch': 2} {'type': 'loss', 'content': 0.15874089300632477, 'timestamp': '2025-09-30 22:32:26.855805', 'step': 12973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:26.914344', 'step': 12973, 'epoch': 2} {'type': 'loss', 'content': 0.08035668730735779, 'timestamp': '2025-09-30 22:32:26.926253', 'step': 12974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:26.986084', 'step': 12974, 'epoch': 2} {'type': 'loss', 'content': 0.08816663920879364, 'timestamp': '2025-09-30 22:32:26.990302', 'step': 12975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:27.050004', 'step': 12975, 'epoch': 2} {'type': 'loss', 'content': 0.048578035086393356, 'timestamp': '2025-09-30 22:32:27.057367', 'step': 12976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:27.121139', 'step': 12976, 'epoch': 2} {'type': 'loss', 'content': 0.12185467034578323, 'timestamp': '2025-09-30 22:32:27.125567', 'step': 12977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:27.183963', 'step': 12977, 'epoch': 2} {'type': 'loss', 'content': 0.14296378195285797, 'timestamp': '2025-09-30 22:32:27.186984', 'step': 12978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:27.253401', 'step': 12978, 'epoch': 2} {'type': 'loss', 'content': 0.11883772164583206, 'timestamp': '2025-09-30 22:32:27.269098', 'step': 12979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:27.330656', 'step': 12979, 'epoch': 2} {'type': 'loss', 'content': 0.07506727427244186, 'timestamp': '2025-09-30 22:32:27.337446', 'step': 12980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:27.396140', 'step': 12980, 'epoch': 2} {'type': 'loss', 'content': 0.11778086423873901, 'timestamp': '2025-09-30 22:32:27.400587', 'step': 12981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:27.459659', 'step': 12981, 'epoch': 2} {'type': 'loss', 'content': 0.07572253048419952, 'timestamp': '2025-09-30 22:32:27.462993', 'step': 12982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:27.529949', 'step': 12982, 'epoch': 2} {'type': 'loss', 'content': 0.11739789694547653, 'timestamp': '2025-09-30 22:32:27.533756', 'step': 12983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:27.605244', 'step': 12983, 'epoch': 2} {'type': 'loss', 'content': 0.09543925523757935, 'timestamp': '2025-09-30 22:32:27.613296', 'step': 12984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:27.672406', 'step': 12984, 'epoch': 2} {'type': 'loss', 'content': 0.07419118285179138, 'timestamp': '2025-09-30 22:32:27.676461', 'step': 12985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:32:27.746368', 'step': 12985, 'epoch': 2} {'type': 'loss', 'content': 0.16694746911525726, 'timestamp': '2025-09-30 22:32:27.749861', 'step': 12986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:27.807514', 'step': 12986, 'epoch': 2} {'type': 'loss', 'content': 0.10685575753450394, 'timestamp': '2025-09-30 22:32:27.810142', 'step': 12987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:27.869896', 'step': 12987, 'epoch': 2} {'type': 'loss', 'content': 0.09998980909585953, 'timestamp': '2025-09-30 22:32:27.876986', 'step': 12988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:27.971098', 'step': 12988, 'epoch': 2} {'type': 'loss', 'content': 0.06701143085956573, 'timestamp': '2025-09-30 22:32:27.976147', 'step': 12989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:28.035265', 'step': 12989, 'epoch': 2} {'type': 'loss', 'content': 0.06968081742525101, 'timestamp': '2025-09-30 22:32:28.037801', 'step': 12990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:28.095291', 'step': 12990, 'epoch': 2} {'type': 'loss', 'content': 0.10968199372291565, 'timestamp': '2025-09-30 22:32:28.101841', 'step': 12991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:28.159945', 'step': 12991, 'epoch': 2} {'type': 'loss', 'content': 0.20141537487506866, 'timestamp': '2025-09-30 22:32:28.166653', 'step': 12992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:28.223708', 'step': 12992, 'epoch': 2} {'type': 'loss', 'content': 0.114963598549366, 'timestamp': '2025-09-30 22:32:28.227172', 'step': 12993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:28.285312', 'step': 12993, 'epoch': 2} {'type': 'loss', 'content': 0.20461530983448029, 'timestamp': '2025-09-30 22:32:28.288821', 'step': 12994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:32:28.346390', 'step': 12994, 'epoch': 2} {'type': 'loss', 'content': 0.15975648164749146, 'timestamp': '2025-09-30 22:32:28.349047', 'step': 12995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:28.406699', 'step': 12995, 'epoch': 2} {'type': 'loss', 'content': 0.13807061314582825, 'timestamp': '2025-09-30 22:32:28.414210', 'step': 12996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:28.473132', 'step': 12996, 'epoch': 2} {'type': 'loss', 'content': 0.08056371659040451, 'timestamp': '2025-09-30 22:32:28.483922', 'step': 12997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:28.541395', 'step': 12997, 'epoch': 2} {'type': 'loss', 'content': 0.14269207417964935, 'timestamp': '2025-09-30 22:32:28.546672', 'step': 12998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:28.604997', 'step': 12998, 'epoch': 2} {'type': 'loss', 'content': 0.17378631234169006, 'timestamp': '2025-09-30 22:32:28.609704', 'step': 12999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:28.667400', 'step': 12999, 'epoch': 2} {'type': 'loss', 'content': 0.11939957737922668, 'timestamp': '2025-09-30 22:32:28.675543', 'step': 13000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13000', 'timestamp': '2025-09-30 22:32:29.172456', 'step': 13000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:29.233841', 'step': 13000, 'epoch': 2} {'type': 'loss', 'content': 0.06803259253501892, 'timestamp': '2025-09-30 22:32:29.238103', 'step': 13001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:29.302535', 'step': 13001, 'epoch': 2} {'type': 'loss', 'content': 0.1211807131767273, 'timestamp': '2025-09-30 22:32:29.306950', 'step': 13002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:29.366112', 'step': 13002, 'epoch': 2} {'type': 'loss', 'content': 0.106468066573143, 'timestamp': '2025-09-30 22:32:29.370022', 'step': 13003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:29.429067', 'step': 13003, 'epoch': 2} {'type': 'loss', 'content': 0.1266094446182251, 'timestamp': '2025-09-30 22:32:29.436559', 'step': 13004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:29.494540', 'step': 13004, 'epoch': 2} {'type': 'loss', 'content': 0.14133726060390472, 'timestamp': '2025-09-30 22:32:29.498095', 'step': 13005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:29.557422', 'step': 13005, 'epoch': 2} {'type': 'loss', 'content': 0.15748123824596405, 'timestamp': '2025-09-30 22:32:29.561360', 'step': 13006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:29.631809', 'step': 13006, 'epoch': 2} {'type': 'loss', 'content': 0.09230536967515945, 'timestamp': '2025-09-30 22:32:29.637027', 'step': 13007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:29.696240', 'step': 13007, 'epoch': 2} {'type': 'loss', 'content': 0.15704931318759918, 'timestamp': '2025-09-30 22:32:29.707516', 'step': 13008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:29.768092', 'step': 13008, 'epoch': 2} {'type': 'loss', 'content': 0.15491314232349396, 'timestamp': '2025-09-30 22:32:29.771911', 'step': 13009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:29.830367', 'step': 13009, 'epoch': 2} {'type': 'loss', 'content': 0.09809006005525589, 'timestamp': '2025-09-30 22:32:29.834746', 'step': 13010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:29.893777', 'step': 13010, 'epoch': 2} {'type': 'loss', 'content': 0.2211882323026657, 'timestamp': '2025-09-30 22:32:29.906587', 'step': 13011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:29.964502', 'step': 13011, 'epoch': 2} {'type': 'loss', 'content': 0.21951621770858765, 'timestamp': '2025-09-30 22:32:29.972176', 'step': 13012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:30.037399', 'step': 13012, 'epoch': 2} {'type': 'loss', 'content': 0.17629705369472504, 'timestamp': '2025-09-30 22:32:30.041323', 'step': 13013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:30.099412', 'step': 13013, 'epoch': 2} {'type': 'loss', 'content': 0.1448158323764801, 'timestamp': '2025-09-30 22:32:30.104166', 'step': 13014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:30.164432', 'step': 13014, 'epoch': 2} {'type': 'loss', 'content': 0.07974503189325333, 'timestamp': '2025-09-30 22:32:30.167404', 'step': 13015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:30.225473', 'step': 13015, 'epoch': 2} {'type': 'loss', 'content': 0.08440472185611725, 'timestamp': '2025-09-30 22:32:30.237185', 'step': 13016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:30.295565', 'step': 13016, 'epoch': 2} {'type': 'loss', 'content': 0.16454046964645386, 'timestamp': '2025-09-30 22:32:30.300546', 'step': 13017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:30.373731', 'step': 13017, 'epoch': 2} {'type': 'loss', 'content': 0.11940935254096985, 'timestamp': '2025-09-30 22:32:30.378165', 'step': 13018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:30.437017', 'step': 13018, 'epoch': 2} {'type': 'loss', 'content': 0.0970936045050621, 'timestamp': '2025-09-30 22:32:30.441218', 'step': 13019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:30.516699', 'step': 13019, 'epoch': 2} {'type': 'loss', 'content': 0.06842726469039917, 'timestamp': '2025-09-30 22:32:30.523354', 'step': 13020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:30.581524', 'step': 13020, 'epoch': 2} {'type': 'loss', 'content': 0.080329030752182, 'timestamp': '2025-09-30 22:32:30.585673', 'step': 13021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:30.645753', 'step': 13021, 'epoch': 2} {'type': 'loss', 'content': 0.0749916136264801, 'timestamp': '2025-09-30 22:32:30.649546', 'step': 13022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:30.708967', 'step': 13022, 'epoch': 2} {'type': 'loss', 'content': 0.16953957080841064, 'timestamp': '2025-09-30 22:32:30.713335', 'step': 13023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:32:30.772168', 'step': 13023, 'epoch': 2} {'type': 'loss', 'content': 0.09620711952447891, 'timestamp': '2025-09-30 22:32:30.779547', 'step': 13024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:30.837842', 'step': 13024, 'epoch': 2} {'type': 'loss', 'content': 0.1220424622297287, 'timestamp': '2025-09-30 22:32:30.842476', 'step': 13025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:30.904184', 'step': 13025, 'epoch': 2} {'type': 'loss', 'content': 0.27394264936447144, 'timestamp': '2025-09-30 22:32:30.919019', 'step': 13026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:30.979057', 'step': 13026, 'epoch': 2} {'type': 'loss', 'content': 0.10172710567712784, 'timestamp': '2025-09-30 22:32:31.001862', 'step': 13027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:31.069306', 'step': 13027, 'epoch': 2} {'type': 'loss', 'content': 0.11006706207990646, 'timestamp': '2025-09-30 22:32:31.077908', 'step': 13028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:31.141454', 'step': 13028, 'epoch': 2} {'type': 'loss', 'content': 0.17672128975391388, 'timestamp': '2025-09-30 22:32:31.156848', 'step': 13029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:31.222478', 'step': 13029, 'epoch': 2} {'type': 'loss', 'content': 0.13564667105674744, 'timestamp': '2025-09-30 22:32:31.225701', 'step': 13030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:31.285565', 'step': 13030, 'epoch': 2} {'type': 'loss', 'content': 0.0943329781293869, 'timestamp': '2025-09-30 22:32:31.290309', 'step': 13031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:31.356259', 'step': 13031, 'epoch': 2} {'type': 'loss', 'content': 0.11083698272705078, 'timestamp': '2025-09-30 22:32:31.362493', 'step': 13032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:31.421605', 'step': 13032, 'epoch': 2} {'type': 'loss', 'content': 0.024870261549949646, 'timestamp': '2025-09-30 22:32:31.425836', 'step': 13033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:31.486942', 'step': 13033, 'epoch': 2} {'type': 'loss', 'content': 0.11897458136081696, 'timestamp': '2025-09-30 22:32:31.489507', 'step': 13034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:31.551157', 'step': 13034, 'epoch': 2} {'type': 'loss', 'content': 0.09538214653730392, 'timestamp': '2025-09-30 22:32:31.553743', 'step': 13035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:31.614832', 'step': 13035, 'epoch': 2} {'type': 'loss', 'content': 0.040576476603746414, 'timestamp': '2025-09-30 22:32:31.621204', 'step': 13036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:31.679632', 'step': 13036, 'epoch': 2} {'type': 'loss', 'content': 0.14528889954090118, 'timestamp': '2025-09-30 22:32:31.682505', 'step': 13037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:31.741653', 'step': 13037, 'epoch': 2} {'type': 'loss', 'content': 0.10257671773433685, 'timestamp': '2025-09-30 22:32:31.751260', 'step': 13038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:31.816058', 'step': 13038, 'epoch': 2} {'type': 'loss', 'content': 0.1199692040681839, 'timestamp': '2025-09-30 22:32:31.820953', 'step': 13039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:31.883783', 'step': 13039, 'epoch': 2} {'type': 'loss', 'content': 0.16951292753219604, 'timestamp': '2025-09-30 22:32:31.895793', 'step': 13040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:31.954940', 'step': 13040, 'epoch': 2} {'type': 'loss', 'content': 0.07964964210987091, 'timestamp': '2025-09-30 22:32:31.958056', 'step': 13041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:32.016878', 'step': 13041, 'epoch': 2} {'type': 'loss', 'content': 0.09031029045581818, 'timestamp': '2025-09-30 22:32:32.020308', 'step': 13042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:32.079249', 'step': 13042, 'epoch': 2} {'type': 'loss', 'content': 0.093025341629982, 'timestamp': '2025-09-30 22:32:32.090259', 'step': 13043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:32.148861', 'step': 13043, 'epoch': 2} {'type': 'loss', 'content': 0.15848688781261444, 'timestamp': '2025-09-30 22:32:32.156800', 'step': 13044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:32.215639', 'step': 13044, 'epoch': 2} {'type': 'loss', 'content': 0.21023137867450714, 'timestamp': '2025-09-30 22:32:32.219072', 'step': 13045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:32.286876', 'step': 13045, 'epoch': 2} {'type': 'loss', 'content': 0.03443819656968117, 'timestamp': '2025-09-30 22:32:32.290019', 'step': 13046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:32.348675', 'step': 13046, 'epoch': 2} {'type': 'loss', 'content': 0.05846409127116203, 'timestamp': '2025-09-30 22:32:32.351338', 'step': 13047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:32.427464', 'step': 13047, 'epoch': 2} {'type': 'loss', 'content': 0.06493545323610306, 'timestamp': '2025-09-30 22:32:32.436476', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:32:46.740616', 'step': 13048, 'epoch': 2} {'type': 'pplx', 'content': 9542.479954688217, 'timestamp': '2025-09-30 22:32:46.759348', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:46.816995', 'step': 13048, 'epoch': 2} {'type': 'loss', 'content': 0.10613556951284409, 'timestamp': '2025-09-30 22:32:46.820813', 'step': 13049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:46.880458', 'step': 13049, 'epoch': 2} {'type': 'loss', 'content': 0.08179983496665955, 'timestamp': '2025-09-30 22:32:46.884337', 'step': 13050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:46.945162', 'step': 13050, 'epoch': 2} {'type': 'loss', 'content': 0.14340151846408844, 'timestamp': '2025-09-30 22:32:46.949803', 'step': 13051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:47.009011', 'step': 13051, 'epoch': 2} {'type': 'loss', 'content': 0.0549316368997097, 'timestamp': '2025-09-30 22:32:47.015408', 'step': 13052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:47.072151', 'step': 13052, 'epoch': 2} {'type': 'loss', 'content': 0.10764723271131516, 'timestamp': '2025-09-30 22:32:47.074927', 'step': 13053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:47.132996', 'step': 13053, 'epoch': 2} {'type': 'loss', 'content': 0.10493046045303345, 'timestamp': '2025-09-30 22:32:47.136531', 'step': 13054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:47.194615', 'step': 13054, 'epoch': 2} {'type': 'loss', 'content': 0.1262264847755432, 'timestamp': '2025-09-30 22:32:47.198861', 'step': 13055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:47.258379', 'step': 13055, 'epoch': 2} {'type': 'loss', 'content': 0.13148029148578644, 'timestamp': '2025-09-30 22:32:47.267691', 'step': 13056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:47.325257', 'step': 13056, 'epoch': 2} {'type': 'loss', 'content': 0.26722073554992676, 'timestamp': '2025-09-30 22:32:47.330520', 'step': 13057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:47.387787', 'step': 13057, 'epoch': 2} {'type': 'loss', 'content': 0.11142884939908981, 'timestamp': '2025-09-30 22:32:47.391841', 'step': 13058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:47.452617', 'step': 13058, 'epoch': 2} {'type': 'loss', 'content': 0.1357160061597824, 'timestamp': '2025-09-30 22:32:47.455483', 'step': 13059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:47.513141', 'step': 13059, 'epoch': 2} {'type': 'loss', 'content': 0.108523890376091, 'timestamp': '2025-09-30 22:32:47.519784', 'step': 13060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:47.575779', 'step': 13060, 'epoch': 2} {'type': 'loss', 'content': 0.16803939640522003, 'timestamp': '2025-09-30 22:32:47.578638', 'step': 13061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:47.636195', 'step': 13061, 'epoch': 2} {'type': 'loss', 'content': 0.12171661108732224, 'timestamp': '2025-09-30 22:32:47.646445', 'step': 13062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:47.705179', 'step': 13062, 'epoch': 2} {'type': 'loss', 'content': 0.1525058150291443, 'timestamp': '2025-09-30 22:32:47.707766', 'step': 13063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:47.764580', 'step': 13063, 'epoch': 2} {'type': 'loss', 'content': 0.12486862391233444, 'timestamp': '2025-09-30 22:32:47.770990', 'step': 13064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:47.828677', 'step': 13064, 'epoch': 2} {'type': 'loss', 'content': 0.10671590268611908, 'timestamp': '2025-09-30 22:32:47.830962', 'step': 13065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:47.888039', 'step': 13065, 'epoch': 2} {'type': 'loss', 'content': 0.04507659003138542, 'timestamp': '2025-09-30 22:32:47.890497', 'step': 13066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:47.947783', 'step': 13066, 'epoch': 2} {'type': 'loss', 'content': 0.127610445022583, 'timestamp': '2025-09-30 22:32:47.957435', 'step': 13067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:48.017660', 'step': 13067, 'epoch': 2} {'type': 'loss', 'content': 0.14978502690792084, 'timestamp': '2025-09-30 22:32:48.026102', 'step': 13068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:48.083793', 'step': 13068, 'epoch': 2} {'type': 'loss', 'content': 0.07760772854089737, 'timestamp': '2025-09-30 22:32:48.086287', 'step': 13069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:48.142994', 'step': 13069, 'epoch': 2} {'type': 'loss', 'content': 0.08156832307577133, 'timestamp': '2025-09-30 22:32:48.145408', 'step': 13070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:48.202381', 'step': 13070, 'epoch': 2} {'type': 'loss', 'content': 0.07378488779067993, 'timestamp': '2025-09-30 22:32:48.204735', 'step': 13071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:48.263932', 'step': 13071, 'epoch': 2} {'type': 'loss', 'content': 0.09331710636615753, 'timestamp': '2025-09-30 22:32:48.269894', 'step': 13072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:48.325692', 'step': 13072, 'epoch': 2} {'type': 'loss', 'content': 0.13407577574253082, 'timestamp': '2025-09-30 22:32:48.327955', 'step': 13073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:48.385073', 'step': 13073, 'epoch': 2} {'type': 'loss', 'content': 0.15492671728134155, 'timestamp': '2025-09-30 22:32:48.395333', 'step': 13074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:48.462923', 'step': 13074, 'epoch': 2} {'type': 'loss', 'content': 0.11085113883018494, 'timestamp': '2025-09-30 22:32:48.475396', 'step': 13075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:48.541796', 'step': 13075, 'epoch': 2} {'type': 'loss', 'content': 0.07550526410341263, 'timestamp': '2025-09-30 22:32:48.555552', 'step': 13076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:48.612773', 'step': 13076, 'epoch': 2} {'type': 'loss', 'content': 0.0993955060839653, 'timestamp': '2025-09-30 22:32:48.615987', 'step': 13077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:48.674609', 'step': 13077, 'epoch': 2} {'type': 'loss', 'content': 0.1383700668811798, 'timestamp': '2025-09-30 22:32:48.677063', 'step': 13078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:48.734375', 'step': 13078, 'epoch': 2} {'type': 'loss', 'content': 0.08486541360616684, 'timestamp': '2025-09-30 22:32:48.737966', 'step': 13079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:48.827672', 'step': 13079, 'epoch': 2} {'type': 'loss', 'content': 0.14391668140888214, 'timestamp': '2025-09-30 22:32:48.837965', 'step': 13080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:48.946771', 'step': 13080, 'epoch': 2} {'type': 'loss', 'content': 0.1748402714729309, 'timestamp': '2025-09-30 22:32:48.949641', 'step': 13081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:49.036286', 'step': 13081, 'epoch': 2} {'type': 'loss', 'content': 0.1450996845960617, 'timestamp': '2025-09-30 22:32:49.041773', 'step': 13082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:49.121905', 'step': 13082, 'epoch': 2} {'type': 'loss', 'content': 0.12759730219841003, 'timestamp': '2025-09-30 22:32:49.124146', 'step': 13083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:49.212274', 'step': 13083, 'epoch': 2} {'type': 'loss', 'content': 0.15103575587272644, 'timestamp': '2025-09-30 22:32:49.218220', 'step': 13084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:49.298942', 'step': 13084, 'epoch': 2} {'type': 'loss', 'content': 0.17197231948375702, 'timestamp': '2025-09-30 22:32:49.300874', 'step': 13085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:49.422299', 'step': 13085, 'epoch': 2} {'type': 'loss', 'content': 0.20167036354541779, 'timestamp': '2025-09-30 22:32:49.426210', 'step': 13086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:49.503351', 'step': 13086, 'epoch': 2} {'type': 'loss', 'content': 0.2433283030986786, 'timestamp': '2025-09-30 22:32:49.508442', 'step': 13087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:49.612199', 'step': 13087, 'epoch': 2} {'type': 'loss', 'content': 0.11849728971719742, 'timestamp': '2025-09-30 22:32:49.618239', 'step': 13088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:49.696183', 'step': 13088, 'epoch': 2} {'type': 'loss', 'content': 0.13814057409763336, 'timestamp': '2025-09-30 22:32:49.700765', 'step': 13089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:49.773408', 'step': 13089, 'epoch': 2} {'type': 'loss', 'content': 0.15235397219657898, 'timestamp': '2025-09-30 22:32:49.775826', 'step': 13090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:49.835731', 'step': 13090, 'epoch': 2} {'type': 'loss', 'content': 0.11898598819971085, 'timestamp': '2025-09-30 22:32:49.838633', 'step': 13091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:49.921391', 'step': 13091, 'epoch': 2} {'type': 'loss', 'content': 0.12697602808475494, 'timestamp': '2025-09-30 22:32:49.929424', 'step': 13092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:49.988362', 'step': 13092, 'epoch': 2} {'type': 'loss', 'content': 0.16390515863895416, 'timestamp': '2025-09-30 22:32:49.991699', 'step': 13093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:50.078557', 'step': 13093, 'epoch': 2} {'type': 'loss', 'content': 0.12046722322702408, 'timestamp': '2025-09-30 22:32:50.080985', 'step': 13094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:50.163839', 'step': 13094, 'epoch': 2} {'type': 'loss', 'content': 0.15642043948173523, 'timestamp': '2025-09-30 22:32:50.170822', 'step': 13095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:50.244774', 'step': 13095, 'epoch': 2} {'type': 'loss', 'content': 0.16499736905097961, 'timestamp': '2025-09-30 22:32:50.251061', 'step': 13096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:50.323130', 'step': 13096, 'epoch': 2} {'type': 'loss', 'content': 0.10239452123641968, 'timestamp': '2025-09-30 22:32:50.325445', 'step': 13097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:50.384518', 'step': 13097, 'epoch': 2} {'type': 'loss', 'content': 0.07957333326339722, 'timestamp': '2025-09-30 22:32:50.386733', 'step': 13098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:50.449509', 'step': 13098, 'epoch': 2} {'type': 'loss', 'content': 0.16262662410736084, 'timestamp': '2025-09-30 22:32:50.452121', 'step': 13099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:50.517556', 'step': 13099, 'epoch': 2} {'type': 'loss', 'content': 0.09729500114917755, 'timestamp': '2025-09-30 22:32:50.523763', 'step': 13100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:50.583553', 'step': 13100, 'epoch': 2} {'type': 'loss', 'content': 0.09548618644475937, 'timestamp': '2025-09-30 22:32:50.586365', 'step': 13101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:50.644367', 'step': 13101, 'epoch': 2} {'type': 'loss', 'content': 0.22973155975341797, 'timestamp': '2025-09-30 22:32:50.646579', 'step': 13102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:50.703358', 'step': 13102, 'epoch': 2} {'type': 'loss', 'content': 0.15165071189403534, 'timestamp': '2025-09-30 22:32:50.705743', 'step': 13103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:50.763079', 'step': 13103, 'epoch': 2} {'type': 'loss', 'content': 0.14602221548557281, 'timestamp': '2025-09-30 22:32:50.768974', 'step': 13104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:50.831248', 'step': 13104, 'epoch': 2} {'type': 'loss', 'content': 0.1304263472557068, 'timestamp': '2025-09-30 22:32:50.833591', 'step': 13105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:50.891188', 'step': 13105, 'epoch': 2} {'type': 'loss', 'content': 0.15934151411056519, 'timestamp': '2025-09-30 22:32:50.893338', 'step': 13106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:50.951651', 'step': 13106, 'epoch': 2} {'type': 'loss', 'content': 0.07027623057365417, 'timestamp': '2025-09-30 22:32:50.953993', 'step': 13107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:51.015980', 'step': 13107, 'epoch': 2} {'type': 'loss', 'content': 0.1597442477941513, 'timestamp': '2025-09-30 22:32:51.024309', 'step': 13108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:51.092651', 'step': 13108, 'epoch': 2} {'type': 'loss', 'content': 0.11813145875930786, 'timestamp': '2025-09-30 22:32:51.095557', 'step': 13109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:51.153804', 'step': 13109, 'epoch': 2} {'type': 'loss', 'content': 0.10715239495038986, 'timestamp': '2025-09-30 22:32:51.156227', 'step': 13110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:51.221756', 'step': 13110, 'epoch': 2} {'type': 'loss', 'content': 0.0809844508767128, 'timestamp': '2025-09-30 22:32:51.223894', 'step': 13111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:51.282361', 'step': 13111, 'epoch': 2} {'type': 'loss', 'content': 0.01911093108355999, 'timestamp': '2025-09-30 22:32:51.288381', 'step': 13112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:51.344353', 'step': 13112, 'epoch': 2} {'type': 'loss', 'content': 0.09193141758441925, 'timestamp': '2025-09-30 22:32:51.346678', 'step': 13113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:51.403118', 'step': 13113, 'epoch': 2} {'type': 'loss', 'content': 0.16236810386180878, 'timestamp': '2025-09-30 22:32:51.406452', 'step': 13114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:51.462896', 'step': 13114, 'epoch': 2} {'type': 'loss', 'content': 0.11723935604095459, 'timestamp': '2025-09-30 22:32:51.466389', 'step': 13115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:51.525134', 'step': 13115, 'epoch': 2} {'type': 'loss', 'content': 0.17261946201324463, 'timestamp': '2025-09-30 22:32:51.531066', 'step': 13116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:51.587658', 'step': 13116, 'epoch': 2} {'type': 'loss', 'content': 0.09023163467645645, 'timestamp': '2025-09-30 22:32:51.589752', 'step': 13117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:51.662950', 'step': 13117, 'epoch': 2} {'type': 'loss', 'content': 0.21255312860012054, 'timestamp': '2025-09-30 22:32:51.665173', 'step': 13118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:51.722534', 'step': 13118, 'epoch': 2} {'type': 'loss', 'content': 0.08540907502174377, 'timestamp': '2025-09-30 22:32:51.724859', 'step': 13119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:51.785645', 'step': 13119, 'epoch': 2} {'type': 'loss', 'content': 0.07991503179073334, 'timestamp': '2025-09-30 22:32:51.791747', 'step': 13120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:51.848768', 'step': 13120, 'epoch': 2} {'type': 'loss', 'content': 0.10195271670818329, 'timestamp': '2025-09-30 22:32:51.850814', 'step': 13121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:51.907555', 'step': 13121, 'epoch': 2} {'type': 'loss', 'content': 0.15479159355163574, 'timestamp': '2025-09-30 22:32:51.912276', 'step': 13122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:51.971651', 'step': 13122, 'epoch': 2} {'type': 'loss', 'content': 0.07331832498311996, 'timestamp': '2025-09-30 22:32:51.975767', 'step': 13123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:52.032762', 'step': 13123, 'epoch': 2} {'type': 'loss', 'content': 0.08848675340414047, 'timestamp': '2025-09-30 22:32:52.038509', 'step': 13124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:52.094670', 'step': 13124, 'epoch': 2} {'type': 'loss', 'content': 0.10031943023204803, 'timestamp': '2025-09-30 22:32:52.097993', 'step': 13125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:52.155548', 'step': 13125, 'epoch': 2} {'type': 'loss', 'content': 0.03885447606444359, 'timestamp': '2025-09-30 22:32:52.157933', 'step': 13126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:52.215217', 'step': 13126, 'epoch': 2} {'type': 'loss', 'content': 0.0843016654253006, 'timestamp': '2025-09-30 22:32:52.217715', 'step': 13127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:52.274335', 'step': 13127, 'epoch': 2} {'type': 'loss', 'content': 0.11567841470241547, 'timestamp': '2025-09-30 22:32:52.281619', 'step': 13128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:32:52.339362', 'step': 13128, 'epoch': 2} {'type': 'loss', 'content': 0.09494838863611221, 'timestamp': '2025-09-30 22:32:52.341646', 'step': 13129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:52.398937', 'step': 13129, 'epoch': 2} {'type': 'loss', 'content': 0.06708376109600067, 'timestamp': '2025-09-30 22:32:52.401655', 'step': 13130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:52.459148', 'step': 13130, 'epoch': 2} {'type': 'loss', 'content': 0.12026534974575043, 'timestamp': '2025-09-30 22:32:52.461377', 'step': 13131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:52.517560', 'step': 13131, 'epoch': 2} {'type': 'loss', 'content': 0.10344329476356506, 'timestamp': '2025-09-30 22:32:52.523572', 'step': 13132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:52.591214', 'step': 13132, 'epoch': 2} {'type': 'loss', 'content': 0.20199356973171234, 'timestamp': '2025-09-30 22:32:52.593770', 'step': 13133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:52.650991', 'step': 13133, 'epoch': 2} {'type': 'loss', 'content': 0.07614702731370926, 'timestamp': '2025-09-30 22:32:52.653320', 'step': 13134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:52.710585', 'step': 13134, 'epoch': 2} {'type': 'loss', 'content': 0.13614466786384583, 'timestamp': '2025-09-30 22:32:52.712809', 'step': 13135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:52.769943', 'step': 13135, 'epoch': 2} {'type': 'loss', 'content': 0.14726750552654266, 'timestamp': '2025-09-30 22:32:52.775782', 'step': 13136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:52.831967', 'step': 13136, 'epoch': 2} {'type': 'loss', 'content': 0.08687493205070496, 'timestamp': '2025-09-30 22:32:52.835518', 'step': 13137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:52.892136', 'step': 13137, 'epoch': 2} {'type': 'loss', 'content': 0.05440206080675125, 'timestamp': '2025-09-30 22:32:52.894670', 'step': 13138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:52.951103', 'step': 13138, 'epoch': 2} {'type': 'loss', 'content': 0.04900360852479935, 'timestamp': '2025-09-30 22:32:52.965659', 'step': 13139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:53.024908', 'step': 13139, 'epoch': 2} {'type': 'loss', 'content': 0.1080896183848381, 'timestamp': '2025-09-30 22:32:53.030758', 'step': 13140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:53.087420', 'step': 13140, 'epoch': 2} {'type': 'loss', 'content': 0.13671593368053436, 'timestamp': '2025-09-30 22:32:53.091159', 'step': 13141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:53.166369', 'step': 13141, 'epoch': 2} {'type': 'loss', 'content': 0.10698365420103073, 'timestamp': '2025-09-30 22:32:53.168637', 'step': 13142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:53.225423', 'step': 13142, 'epoch': 2} {'type': 'loss', 'content': 0.14539125561714172, 'timestamp': '2025-09-30 22:32:53.227750', 'step': 13143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:53.284452', 'step': 13143, 'epoch': 2} {'type': 'loss', 'content': 0.2725231349468231, 'timestamp': '2025-09-30 22:32:53.290641', 'step': 13144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:53.347132', 'step': 13144, 'epoch': 2} {'type': 'loss', 'content': 0.09828327596187592, 'timestamp': '2025-09-30 22:32:53.349450', 'step': 13145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:53.406482', 'step': 13145, 'epoch': 2} {'type': 'loss', 'content': 0.22720324993133545, 'timestamp': '2025-09-30 22:32:53.416756', 'step': 13146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:53.480785', 'step': 13146, 'epoch': 2} {'type': 'loss', 'content': 0.10321184247732162, 'timestamp': '2025-09-30 22:32:53.483908', 'step': 13147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:53.542952', 'step': 13147, 'epoch': 2} {'type': 'loss', 'content': 0.18612881004810333, 'timestamp': '2025-09-30 22:32:53.549506', 'step': 13148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:53.605764', 'step': 13148, 'epoch': 2} {'type': 'loss', 'content': 0.16683338582515717, 'timestamp': '2025-09-30 22:32:53.607949', 'step': 13149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:53.668383', 'step': 13149, 'epoch': 2} {'type': 'loss', 'content': 0.17271128296852112, 'timestamp': '2025-09-30 22:32:53.670713', 'step': 13150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:53.729000', 'step': 13150, 'epoch': 2} {'type': 'loss', 'content': 0.14786051213741302, 'timestamp': '2025-09-30 22:32:53.731808', 'step': 13151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:53.788344', 'step': 13151, 'epoch': 2} {'type': 'loss', 'content': 0.10978154838085175, 'timestamp': '2025-09-30 22:32:53.794570', 'step': 13152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:53.850415', 'step': 13152, 'epoch': 2} {'type': 'loss', 'content': 0.09734270721673965, 'timestamp': '2025-09-30 22:32:53.852937', 'step': 13153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:53.909356', 'step': 13153, 'epoch': 2} {'type': 'loss', 'content': 0.1874821037054062, 'timestamp': '2025-09-30 22:32:53.912316', 'step': 13154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:53.974660', 'step': 13154, 'epoch': 2} {'type': 'loss', 'content': 0.14905935525894165, 'timestamp': '2025-09-30 22:32:53.976839', 'step': 13155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:54.033449', 'step': 13155, 'epoch': 2} {'type': 'loss', 'content': 0.21647605299949646, 'timestamp': '2025-09-30 22:32:54.039841', 'step': 13156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:54.108519', 'step': 13156, 'epoch': 2} {'type': 'loss', 'content': 0.058627329766750336, 'timestamp': '2025-09-30 22:32:54.112881', 'step': 13157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:54.171714', 'step': 13157, 'epoch': 2} {'type': 'loss', 'content': 0.1511208564043045, 'timestamp': '2025-09-30 22:32:54.174141', 'step': 13158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:54.232155', 'step': 13158, 'epoch': 2} {'type': 'loss', 'content': 0.07819663733243942, 'timestamp': '2025-09-30 22:32:54.234726', 'step': 13159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:54.291454', 'step': 13159, 'epoch': 2} {'type': 'loss', 'content': 0.051975227892398834, 'timestamp': '2025-09-30 22:32:54.297622', 'step': 13160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:54.353290', 'step': 13160, 'epoch': 2} {'type': 'loss', 'content': 0.1344275325536728, 'timestamp': '2025-09-30 22:32:54.355999', 'step': 13161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:54.412733', 'step': 13161, 'epoch': 2} {'type': 'loss', 'content': 0.05843384936451912, 'timestamp': '2025-09-30 22:32:54.415046', 'step': 13162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:54.473876', 'step': 13162, 'epoch': 2} {'type': 'loss', 'content': 0.14643187820911407, 'timestamp': '2025-09-30 22:32:54.476144', 'step': 13163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:54.532785', 'step': 13163, 'epoch': 2} {'type': 'loss', 'content': 0.09519834071397781, 'timestamp': '2025-09-30 22:32:54.540415', 'step': 13164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:54.601918', 'step': 13164, 'epoch': 2} {'type': 'loss', 'content': 0.12605291604995728, 'timestamp': '2025-09-30 22:32:54.604400', 'step': 13165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:54.687089', 'step': 13165, 'epoch': 2} {'type': 'loss', 'content': 0.1124090626835823, 'timestamp': '2025-09-30 22:32:54.689940', 'step': 13166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:54.747240', 'step': 13166, 'epoch': 2} {'type': 'loss', 'content': 0.1080375388264656, 'timestamp': '2025-09-30 22:32:54.750317', 'step': 13167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:54.807138', 'step': 13167, 'epoch': 2} {'type': 'loss', 'content': 0.10742557793855667, 'timestamp': '2025-09-30 22:32:54.814325', 'step': 13168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:54.882897', 'step': 13168, 'epoch': 2} {'type': 'loss', 'content': 0.15180686116218567, 'timestamp': '2025-09-30 22:32:54.885619', 'step': 13169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:54.942108', 'step': 13169, 'epoch': 2} {'type': 'loss', 'content': 0.13258738815784454, 'timestamp': '2025-09-30 22:32:54.945147', 'step': 13170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:55.002556', 'step': 13170, 'epoch': 2} {'type': 'loss', 'content': 0.10230737924575806, 'timestamp': '2025-09-30 22:32:55.004934', 'step': 13171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:55.061601', 'step': 13171, 'epoch': 2} {'type': 'loss', 'content': 0.04763251543045044, 'timestamp': '2025-09-30 22:32:55.068034', 'step': 13172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:55.125525', 'step': 13172, 'epoch': 2} {'type': 'loss', 'content': 0.07325000315904617, 'timestamp': '2025-09-30 22:32:55.128786', 'step': 13173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:55.188197', 'step': 13173, 'epoch': 2} {'type': 'loss', 'content': 0.1285071074962616, 'timestamp': '2025-09-30 22:32:55.191239', 'step': 13174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:55.248948', 'step': 13174, 'epoch': 2} {'type': 'loss', 'content': 0.12700682878494263, 'timestamp': '2025-09-30 22:32:55.251655', 'step': 13175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:55.308190', 'step': 13175, 'epoch': 2} {'type': 'loss', 'content': 0.0958019495010376, 'timestamp': '2025-09-30 22:32:55.314131', 'step': 13176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:55.371555', 'step': 13176, 'epoch': 2} {'type': 'loss', 'content': 0.11773036420345306, 'timestamp': '2025-09-30 22:32:55.375138', 'step': 13177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:55.432533', 'step': 13177, 'epoch': 2} {'type': 'loss', 'content': 0.10847535729408264, 'timestamp': '2025-09-30 22:32:55.435520', 'step': 13178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:55.510456', 'step': 13178, 'epoch': 2} {'type': 'loss', 'content': 0.0843028724193573, 'timestamp': '2025-09-30 22:32:55.513471', 'step': 13179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:55.571891', 'step': 13179, 'epoch': 2} {'type': 'loss', 'content': 0.18063825368881226, 'timestamp': '2025-09-30 22:32:55.577883', 'step': 13180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:55.636015', 'step': 13180, 'epoch': 2} {'type': 'loss', 'content': 0.06284285336732864, 'timestamp': '2025-09-30 22:32:55.639034', 'step': 13181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:55.696815', 'step': 13181, 'epoch': 2} {'type': 'loss', 'content': 0.1288483589887619, 'timestamp': '2025-09-30 22:32:55.699209', 'step': 13182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:55.758147', 'step': 13182, 'epoch': 2} {'type': 'loss', 'content': 0.125240758061409, 'timestamp': '2025-09-30 22:32:55.760524', 'step': 13183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:55.817436', 'step': 13183, 'epoch': 2} {'type': 'loss', 'content': 0.04565947875380516, 'timestamp': '2025-09-30 22:32:55.824150', 'step': 13184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:55.896063', 'step': 13184, 'epoch': 2} {'type': 'loss', 'content': 0.08341578394174576, 'timestamp': '2025-09-30 22:32:55.899556', 'step': 13185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:55.957515', 'step': 13185, 'epoch': 2} {'type': 'loss', 'content': 0.1668008416891098, 'timestamp': '2025-09-30 22:32:55.961518', 'step': 13186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:56.018276', 'step': 13186, 'epoch': 2} {'type': 'loss', 'content': 0.0938703715801239, 'timestamp': '2025-09-30 22:32:56.023964', 'step': 13187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:56.091383', 'step': 13187, 'epoch': 2} {'type': 'loss', 'content': 0.08064571022987366, 'timestamp': '2025-09-30 22:32:56.102415', 'step': 13188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:56.161238', 'step': 13188, 'epoch': 2} {'type': 'loss', 'content': 0.102317214012146, 'timestamp': '2025-09-30 22:32:56.167261', 'step': 13189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:56.226741', 'step': 13189, 'epoch': 2} {'type': 'loss', 'content': 0.15035389363765717, 'timestamp': '2025-09-30 22:32:56.229265', 'step': 13190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:56.287125', 'step': 13190, 'epoch': 2} {'type': 'loss', 'content': 0.08412858098745346, 'timestamp': '2025-09-30 22:32:56.290671', 'step': 13191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:56.349593', 'step': 13191, 'epoch': 2} {'type': 'loss', 'content': 0.07376616448163986, 'timestamp': '2025-09-30 22:32:56.356833', 'step': 13192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:56.416109', 'step': 13192, 'epoch': 2} {'type': 'loss', 'content': 0.1006951704621315, 'timestamp': '2025-09-30 22:32:56.418387', 'step': 13193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:56.482161', 'step': 13193, 'epoch': 2} {'type': 'loss', 'content': 0.07274988293647766, 'timestamp': '2025-09-30 22:32:56.485101', 'step': 13194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:56.543306', 'step': 13194, 'epoch': 2} {'type': 'loss', 'content': 0.08635037392377853, 'timestamp': '2025-09-30 22:32:56.546539', 'step': 13195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:56.603481', 'step': 13195, 'epoch': 2} {'type': 'loss', 'content': 0.11846810579299927, 'timestamp': '2025-09-30 22:32:56.610727', 'step': 13196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:56.667428', 'step': 13196, 'epoch': 2} {'type': 'loss', 'content': 0.13362203538417816, 'timestamp': '2025-09-30 22:32:56.671057', 'step': 13197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:56.728427', 'step': 13197, 'epoch': 2} {'type': 'loss', 'content': 0.08403399586677551, 'timestamp': '2025-09-30 22:32:56.731757', 'step': 13198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:56.790466', 'step': 13198, 'epoch': 2} {'type': 'loss', 'content': 0.11400392651557922, 'timestamp': '2025-09-30 22:32:56.793742', 'step': 13199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:56.850593', 'step': 13199, 'epoch': 2} {'type': 'loss', 'content': 0.12508663535118103, 'timestamp': '2025-09-30 22:32:56.857189', 'step': 13200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:56.912668', 'step': 13200, 'epoch': 2} {'type': 'loss', 'content': 0.07850036770105362, 'timestamp': '2025-09-30 22:32:56.915845', 'step': 13201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:56.973947', 'step': 13201, 'epoch': 2} {'type': 'loss', 'content': 0.12021785974502563, 'timestamp': '2025-09-30 22:32:56.977586', 'step': 13202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.033989', 'step': 13202, 'epoch': 2} {'type': 'loss', 'content': 0.09938646107912064, 'timestamp': '2025-09-30 22:32:57.037680', 'step': 13203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:57.093393', 'step': 13203, 'epoch': 2} {'type': 'loss', 'content': 0.10931593179702759, 'timestamp': '2025-09-30 22:32:57.099597', 'step': 13204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.155908', 'step': 13204, 'epoch': 2} {'type': 'loss', 'content': 0.07647478580474854, 'timestamp': '2025-09-30 22:32:57.158635', 'step': 13205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.217307', 'step': 13205, 'epoch': 2} {'type': 'loss', 'content': 0.17057737708091736, 'timestamp': '2025-09-30 22:32:57.221451', 'step': 13206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.277979', 'step': 13206, 'epoch': 2} {'type': 'loss', 'content': 0.08981678634881973, 'timestamp': '2025-09-30 22:32:57.282000', 'step': 13207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:57.339640', 'step': 13207, 'epoch': 2} {'type': 'loss', 'content': 0.08888893574476242, 'timestamp': '2025-09-30 22:32:57.349963', 'step': 13208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.407528', 'step': 13208, 'epoch': 2} {'type': 'loss', 'content': 0.16723795235157013, 'timestamp': '2025-09-30 22:32:57.412422', 'step': 13209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.473369', 'step': 13209, 'epoch': 2} {'type': 'loss', 'content': 0.11396122723817825, 'timestamp': '2025-09-30 22:32:57.476054', 'step': 13210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.543244', 'step': 13210, 'epoch': 2} {'type': 'loss', 'content': 0.11997603625059128, 'timestamp': '2025-09-30 22:32:57.549687', 'step': 13211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.606643', 'step': 13211, 'epoch': 2} {'type': 'loss', 'content': 0.04882190003991127, 'timestamp': '2025-09-30 22:32:57.613121', 'step': 13212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:57.670528', 'step': 13212, 'epoch': 2} {'type': 'loss', 'content': 0.05010480806231499, 'timestamp': '2025-09-30 22:32:57.674417', 'step': 13213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:57.731690', 'step': 13213, 'epoch': 2} {'type': 'loss', 'content': 0.06851644814014435, 'timestamp': '2025-09-30 22:32:57.734059', 'step': 13214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:57.790169', 'step': 13214, 'epoch': 2} {'type': 'loss', 'content': 0.1214342787861824, 'timestamp': '2025-09-30 22:32:57.792383', 'step': 13215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:57.848970', 'step': 13215, 'epoch': 2} {'type': 'loss', 'content': 0.09166383743286133, 'timestamp': '2025-09-30 22:32:57.855793', 'step': 13216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:57.912300', 'step': 13216, 'epoch': 2} {'type': 'loss', 'content': 0.12598708271980286, 'timestamp': '2025-09-30 22:32:57.915805', 'step': 13217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:57.985853', 'step': 13217, 'epoch': 2} {'type': 'loss', 'content': 0.17843985557556152, 'timestamp': '2025-09-30 22:32:57.988169', 'step': 13218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:58.046119', 'step': 13218, 'epoch': 2} {'type': 'loss', 'content': 0.10828561335802078, 'timestamp': '2025-09-30 22:32:58.048395', 'step': 13219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:58.107789', 'step': 13219, 'epoch': 2} {'type': 'loss', 'content': 0.09100829064846039, 'timestamp': '2025-09-30 22:32:58.114604', 'step': 13220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:58.170609', 'step': 13220, 'epoch': 2} {'type': 'loss', 'content': 0.10897119343280792, 'timestamp': '2025-09-30 22:32:58.176852', 'step': 13221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:58.237795', 'step': 13221, 'epoch': 2} {'type': 'loss', 'content': 0.11818656325340271, 'timestamp': '2025-09-30 22:32:58.239903', 'step': 13222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:58.296537', 'step': 13222, 'epoch': 2} {'type': 'loss', 'content': 0.040384501218795776, 'timestamp': '2025-09-30 22:32:58.298851', 'step': 13223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:58.355462', 'step': 13223, 'epoch': 2} {'type': 'loss', 'content': 0.09232880920171738, 'timestamp': '2025-09-30 22:32:58.362697', 'step': 13224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:58.418600', 'step': 13224, 'epoch': 2} {'type': 'loss', 'content': 0.229567289352417, 'timestamp': '2025-09-30 22:32:58.420987', 'step': 13225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:32:58.477667', 'step': 13225, 'epoch': 2} {'type': 'loss', 'content': 0.15107077360153198, 'timestamp': '2025-09-30 22:32:58.479786', 'step': 13226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:58.536556', 'step': 13226, 'epoch': 2} {'type': 'loss', 'content': 0.1385725885629654, 'timestamp': '2025-09-30 22:32:58.538905', 'step': 13227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:58.595441', 'step': 13227, 'epoch': 2} {'type': 'loss', 'content': 0.13644535839557648, 'timestamp': '2025-09-30 22:32:58.601921', 'step': 13228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:58.657884', 'step': 13228, 'epoch': 2} {'type': 'loss', 'content': 0.11659173667430878, 'timestamp': '2025-09-30 22:32:58.660192', 'step': 13229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:32:58.717521', 'step': 13229, 'epoch': 2} {'type': 'loss', 'content': 0.08516962081193924, 'timestamp': '2025-09-30 22:32:58.720039', 'step': 13230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:58.778322', 'step': 13230, 'epoch': 2} {'type': 'loss', 'content': 0.20114384591579437, 'timestamp': '2025-09-30 22:32:58.780621', 'step': 13231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:58.838155', 'step': 13231, 'epoch': 2} {'type': 'loss', 'content': 0.2290557473897934, 'timestamp': '2025-09-30 22:32:58.843964', 'step': 13232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:58.899652', 'step': 13232, 'epoch': 2} {'type': 'loss', 'content': 0.10148929804563522, 'timestamp': '2025-09-30 22:32:58.902591', 'step': 13233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:58.958445', 'step': 13233, 'epoch': 2} {'type': 'loss', 'content': 0.075857013463974, 'timestamp': '2025-09-30 22:32:58.960840', 'step': 13234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:59.029420', 'step': 13234, 'epoch': 2} {'type': 'loss', 'content': 0.1622467190027237, 'timestamp': '2025-09-30 22:32:59.031958', 'step': 13235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:59.092410', 'step': 13235, 'epoch': 2} {'type': 'loss', 'content': 0.10507582873106003, 'timestamp': '2025-09-30 22:32:59.099599', 'step': 13236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:59.174103', 'step': 13236, 'epoch': 2} {'type': 'loss', 'content': 0.08965320140123367, 'timestamp': '2025-09-30 22:32:59.176536', 'step': 13237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:59.243971', 'step': 13237, 'epoch': 2} {'type': 'loss', 'content': 0.11520853638648987, 'timestamp': '2025-09-30 22:32:59.246425', 'step': 13238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:59.302894', 'step': 13238, 'epoch': 2} {'type': 'loss', 'content': 0.186559796333313, 'timestamp': '2025-09-30 22:32:59.305158', 'step': 13239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:32:59.361612', 'step': 13239, 'epoch': 2} {'type': 'loss', 'content': 0.19317910075187683, 'timestamp': '2025-09-30 22:32:59.367464', 'step': 13240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:59.423739', 'step': 13240, 'epoch': 2} {'type': 'loss', 'content': 0.1283768266439438, 'timestamp': '2025-09-30 22:32:59.427478', 'step': 13241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:59.484108', 'step': 13241, 'epoch': 2} {'type': 'loss', 'content': 0.08288918435573578, 'timestamp': '2025-09-30 22:32:59.486348', 'step': 13242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:59.542587', 'step': 13242, 'epoch': 2} {'type': 'loss', 'content': 0.17522279918193817, 'timestamp': '2025-09-30 22:32:59.544917', 'step': 13243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:59.601485', 'step': 13243, 'epoch': 2} {'type': 'loss', 'content': 0.08815182000398636, 'timestamp': '2025-09-30 22:32:59.607409', 'step': 13244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:32:59.663230', 'step': 13244, 'epoch': 2} {'type': 'loss', 'content': 0.1828223615884781, 'timestamp': '2025-09-30 22:32:59.665510', 'step': 13245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:32:59.724142', 'step': 13245, 'epoch': 2} {'type': 'loss', 'content': 0.1860043704509735, 'timestamp': '2025-09-30 22:32:59.726789', 'step': 13246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:59.783927', 'step': 13246, 'epoch': 2} {'type': 'loss', 'content': 0.14742927253246307, 'timestamp': '2025-09-30 22:32:59.786564', 'step': 13247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:59.844529', 'step': 13247, 'epoch': 2} {'type': 'loss', 'content': 0.08312302827835083, 'timestamp': '2025-09-30 22:32:59.850801', 'step': 13248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:59.906876', 'step': 13248, 'epoch': 2} {'type': 'loss', 'content': 0.11586830019950867, 'timestamp': '2025-09-30 22:32:59.909522', 'step': 13249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:32:59.965961', 'step': 13249, 'epoch': 2} {'type': 'loss', 'content': 0.15709218382835388, 'timestamp': '2025-09-30 22:32:59.968925', 'step': 13250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:00.026087', 'step': 13250, 'epoch': 2} {'type': 'loss', 'content': 0.12273158133029938, 'timestamp': '2025-09-30 22:33:00.028650', 'step': 13251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:00.085719', 'step': 13251, 'epoch': 2} {'type': 'loss', 'content': 0.09593997895717621, 'timestamp': '2025-09-30 22:33:00.091677', 'step': 13252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:00.149980', 'step': 13252, 'epoch': 2} {'type': 'loss', 'content': 0.13727308809757233, 'timestamp': '2025-09-30 22:33:00.154964', 'step': 13253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:00.213989', 'step': 13253, 'epoch': 2} {'type': 'loss', 'content': 0.13746236264705658, 'timestamp': '2025-09-30 22:33:00.216224', 'step': 13254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:00.273436', 'step': 13254, 'epoch': 2} {'type': 'loss', 'content': 0.1172659620642662, 'timestamp': '2025-09-30 22:33:00.275782', 'step': 13255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:00.333267', 'step': 13255, 'epoch': 2} {'type': 'loss', 'content': 0.14295944571495056, 'timestamp': '2025-09-30 22:33:00.339328', 'step': 13256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:00.413065', 'step': 13256, 'epoch': 2} {'type': 'loss', 'content': 0.11036457866430283, 'timestamp': '2025-09-30 22:33:00.415290', 'step': 13257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:00.472875', 'step': 13257, 'epoch': 2} {'type': 'loss', 'content': 0.06873068958520889, 'timestamp': '2025-09-30 22:33:00.475170', 'step': 13258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:00.533375', 'step': 13258, 'epoch': 2} {'type': 'loss', 'content': 0.0760037750005722, 'timestamp': '2025-09-30 22:33:00.535797', 'step': 13259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:00.592624', 'step': 13259, 'epoch': 2} {'type': 'loss', 'content': 0.07513654977083206, 'timestamp': '2025-09-30 22:33:00.599099', 'step': 13260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:00.654999', 'step': 13260, 'epoch': 2} {'type': 'loss', 'content': 0.2477407455444336, 'timestamp': '2025-09-30 22:33:00.657194', 'step': 13261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:00.714914', 'step': 13261, 'epoch': 2} {'type': 'loss', 'content': 0.18104080855846405, 'timestamp': '2025-09-30 22:33:00.717512', 'step': 13262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:00.775940', 'step': 13262, 'epoch': 2} {'type': 'loss', 'content': 0.0225844644010067, 'timestamp': '2025-09-30 22:33:00.778155', 'step': 13263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:00.839249', 'step': 13263, 'epoch': 2} {'type': 'loss', 'content': 0.09951434284448624, 'timestamp': '2025-09-30 22:33:00.845148', 'step': 13264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:00.901965', 'step': 13264, 'epoch': 2} {'type': 'loss', 'content': 0.06910157948732376, 'timestamp': '2025-09-30 22:33:00.906128', 'step': 13265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:00.963725', 'step': 13265, 'epoch': 2} {'type': 'loss', 'content': 0.1672404259443283, 'timestamp': '2025-09-30 22:33:00.968348', 'step': 13266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:01.027043', 'step': 13266, 'epoch': 2} {'type': 'loss', 'content': 0.12136224657297134, 'timestamp': '2025-09-30 22:33:01.030696', 'step': 13267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:01.089313', 'step': 13267, 'epoch': 2} {'type': 'loss', 'content': 0.13195361196994781, 'timestamp': '2025-09-30 22:33:01.095023', 'step': 13268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:01.151900', 'step': 13268, 'epoch': 2} {'type': 'loss', 'content': 0.13345582783222198, 'timestamp': '2025-09-30 22:33:01.154805', 'step': 13269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:01.211472', 'step': 13269, 'epoch': 2} {'type': 'loss', 'content': 0.07198724895715714, 'timestamp': '2025-09-30 22:33:01.213885', 'step': 13270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:01.270854', 'step': 13270, 'epoch': 2} {'type': 'loss', 'content': 0.0443735234439373, 'timestamp': '2025-09-30 22:33:01.273233', 'step': 13271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:01.334925', 'step': 13271, 'epoch': 2} {'type': 'loss', 'content': 0.07590758055448532, 'timestamp': '2025-09-30 22:33:01.341035', 'step': 13272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:01.405092', 'step': 13272, 'epoch': 2} {'type': 'loss', 'content': 0.13464754819869995, 'timestamp': '2025-09-30 22:33:01.407539', 'step': 13273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:01.465343', 'step': 13273, 'epoch': 2} {'type': 'loss', 'content': 0.10520998388528824, 'timestamp': '2025-09-30 22:33:01.467907', 'step': 13274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:01.528769', 'step': 13274, 'epoch': 2} {'type': 'loss', 'content': 0.1379307210445404, 'timestamp': '2025-09-30 22:33:01.530940', 'step': 13275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:01.587960', 'step': 13275, 'epoch': 2} {'type': 'loss', 'content': 0.11899205297231674, 'timestamp': '2025-09-30 22:33:01.593906', 'step': 13276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:01.652165', 'step': 13276, 'epoch': 2} {'type': 'loss', 'content': 0.07272734493017197, 'timestamp': '2025-09-30 22:33:01.656655', 'step': 13277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:01.716993', 'step': 13277, 'epoch': 2} {'type': 'loss', 'content': 0.11732853949069977, 'timestamp': '2025-09-30 22:33:01.719126', 'step': 13278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:01.787833', 'step': 13278, 'epoch': 2} {'type': 'loss', 'content': 0.05409834906458855, 'timestamp': '2025-09-30 22:33:01.790181', 'step': 13279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:01.850184', 'step': 13279, 'epoch': 2} {'type': 'loss', 'content': 0.1638982892036438, 'timestamp': '2025-09-30 22:33:01.856286', 'step': 13280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:01.912939', 'step': 13280, 'epoch': 2} {'type': 'loss', 'content': 0.1087234616279602, 'timestamp': '2025-09-30 22:33:01.915461', 'step': 13281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:01.982979', 'step': 13281, 'epoch': 2} {'type': 'loss', 'content': 0.17784787714481354, 'timestamp': '2025-09-30 22:33:01.988577', 'step': 13282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:02.061788', 'step': 13282, 'epoch': 2} {'type': 'loss', 'content': 0.09748345613479614, 'timestamp': '2025-09-30 22:33:02.071504', 'step': 13283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:02.161555', 'step': 13283, 'epoch': 2} {'type': 'loss', 'content': 0.08614181727170944, 'timestamp': '2025-09-30 22:33:02.168828', 'step': 13284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:02.229114', 'step': 13284, 'epoch': 2} {'type': 'loss', 'content': 0.09729045629501343, 'timestamp': '2025-09-30 22:33:02.233182', 'step': 13285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:02.294738', 'step': 13285, 'epoch': 2} {'type': 'loss', 'content': 0.16509972512722015, 'timestamp': '2025-09-30 22:33:02.297102', 'step': 13286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:02.354061', 'step': 13286, 'epoch': 2} {'type': 'loss', 'content': 0.08183673024177551, 'timestamp': '2025-09-30 22:33:02.356504', 'step': 13287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:02.415078', 'step': 13287, 'epoch': 2} {'type': 'loss', 'content': 0.061270516365766525, 'timestamp': '2025-09-30 22:33:02.424994', 'step': 13288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:02.518560', 'step': 13288, 'epoch': 2} {'type': 'loss', 'content': 0.09379301965236664, 'timestamp': '2025-09-30 22:33:02.521049', 'step': 13289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:02.589949', 'step': 13289, 'epoch': 2} {'type': 'loss', 'content': 0.12097536772489548, 'timestamp': '2025-09-30 22:33:02.592027', 'step': 13290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:02.655699', 'step': 13290, 'epoch': 2} {'type': 'loss', 'content': 0.14954042434692383, 'timestamp': '2025-09-30 22:33:02.658061', 'step': 13291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:02.719508', 'step': 13291, 'epoch': 2} {'type': 'loss', 'content': 0.0943366289138794, 'timestamp': '2025-09-30 22:33:02.725737', 'step': 13292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:33:02.784481', 'step': 13292, 'epoch': 2} {'type': 'loss', 'content': 0.16127893328666687, 'timestamp': '2025-09-30 22:33:02.787133', 'step': 13293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:02.878829', 'step': 13293, 'epoch': 2} {'type': 'loss', 'content': 0.10885206609964371, 'timestamp': '2025-09-30 22:33:02.881025', 'step': 13294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:02.938896', 'step': 13294, 'epoch': 2} {'type': 'loss', 'content': 0.04316553846001625, 'timestamp': '2025-09-30 22:33:02.941178', 'step': 13295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:03.008325', 'step': 13295, 'epoch': 2} {'type': 'loss', 'content': 0.164291650056839, 'timestamp': '2025-09-30 22:33:03.015122', 'step': 13296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:03.076093', 'step': 13296, 'epoch': 2} {'type': 'loss', 'content': 0.0883433148264885, 'timestamp': '2025-09-30 22:33:03.078907', 'step': 13297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:03.151542', 'step': 13297, 'epoch': 2} {'type': 'loss', 'content': 0.09486087411642075, 'timestamp': '2025-09-30 22:33:03.153942', 'step': 13298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:03.227611', 'step': 13298, 'epoch': 2} {'type': 'loss', 'content': 0.09930522739887238, 'timestamp': '2025-09-30 22:33:03.230144', 'step': 13299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:03.298336', 'step': 13299, 'epoch': 2} {'type': 'loss', 'content': 0.1111646369099617, 'timestamp': '2025-09-30 22:33:03.304874', 'step': 13300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:03.362109', 'step': 13300, 'epoch': 2} {'type': 'loss', 'content': 0.0677030012011528, 'timestamp': '2025-09-30 22:33:03.364442', 'step': 13301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:03.423266', 'step': 13301, 'epoch': 2} {'type': 'loss', 'content': 0.15751737356185913, 'timestamp': '2025-09-30 22:33:03.426408', 'step': 13302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:03.485366', 'step': 13302, 'epoch': 2} {'type': 'loss', 'content': 0.07679902017116547, 'timestamp': '2025-09-30 22:33:03.489685', 'step': 13303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:03.550666', 'step': 13303, 'epoch': 2} {'type': 'loss', 'content': 0.09121356159448624, 'timestamp': '2025-09-30 22:33:03.556745', 'step': 13304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:03.613106', 'step': 13304, 'epoch': 2} {'type': 'loss', 'content': 0.1456071436405182, 'timestamp': '2025-09-30 22:33:03.621918', 'step': 13305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:03.679146', 'step': 13305, 'epoch': 2} {'type': 'loss', 'content': 0.15096238255500793, 'timestamp': '2025-09-30 22:33:03.681321', 'step': 13306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:03.738171', 'step': 13306, 'epoch': 2} {'type': 'loss', 'content': 0.17135381698608398, 'timestamp': '2025-09-30 22:33:03.740294', 'step': 13307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:03.797176', 'step': 13307, 'epoch': 2} {'type': 'loss', 'content': 0.11274594068527222, 'timestamp': '2025-09-30 22:33:03.802936', 'step': 13308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:03.858107', 'step': 13308, 'epoch': 2} {'type': 'loss', 'content': 0.10576704889535904, 'timestamp': '2025-09-30 22:33:03.860387', 'step': 13309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:03.943527', 'step': 13309, 'epoch': 2} {'type': 'loss', 'content': 0.1933494359254837, 'timestamp': '2025-09-30 22:33:03.946035', 'step': 13310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:04.003106', 'step': 13310, 'epoch': 2} {'type': 'loss', 'content': 0.15337131917476654, 'timestamp': '2025-09-30 22:33:04.005515', 'step': 13311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:04.063851', 'step': 13311, 'epoch': 2} {'type': 'loss', 'content': 0.09765654802322388, 'timestamp': '2025-09-30 22:33:04.069785', 'step': 13312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:04.126133', 'step': 13312, 'epoch': 2} {'type': 'loss', 'content': 0.17446304857730865, 'timestamp': '2025-09-30 22:33:04.129110', 'step': 13313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:04.194929', 'step': 13313, 'epoch': 2} {'type': 'loss', 'content': 0.07825928926467896, 'timestamp': '2025-09-30 22:33:04.197291', 'step': 13314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:04.254319', 'step': 13314, 'epoch': 2} {'type': 'loss', 'content': 0.14998464286327362, 'timestamp': '2025-09-30 22:33:04.256530', 'step': 13315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:04.313402', 'step': 13315, 'epoch': 2} {'type': 'loss', 'content': 0.10481614619493484, 'timestamp': '2025-09-30 22:33:04.319483', 'step': 13316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:04.375100', 'step': 13316, 'epoch': 2} {'type': 'loss', 'content': 0.14021974802017212, 'timestamp': '2025-09-30 22:33:04.378472', 'step': 13317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:04.438854', 'step': 13317, 'epoch': 2} {'type': 'loss', 'content': 0.13390076160430908, 'timestamp': '2025-09-30 22:33:04.441766', 'step': 13318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:04.498820', 'step': 13318, 'epoch': 2} {'type': 'loss', 'content': 0.13538199663162231, 'timestamp': '2025-09-30 22:33:04.500948', 'step': 13319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:04.557202', 'step': 13319, 'epoch': 2} {'type': 'loss', 'content': 0.20617222785949707, 'timestamp': '2025-09-30 22:33:04.563470', 'step': 13320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:04.619391', 'step': 13320, 'epoch': 2} {'type': 'loss', 'content': 0.12515884637832642, 'timestamp': '2025-09-30 22:33:04.622143', 'step': 13321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:04.679203', 'step': 13321, 'epoch': 2} {'type': 'loss', 'content': 0.11814314126968384, 'timestamp': '2025-09-30 22:33:04.681844', 'step': 13322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:04.739122', 'step': 13322, 'epoch': 2} {'type': 'loss', 'content': 0.104371078312397, 'timestamp': '2025-09-30 22:33:04.743507', 'step': 13323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:04.807719', 'step': 13323, 'epoch': 2} {'type': 'loss', 'content': 0.07926169782876968, 'timestamp': '2025-09-30 22:33:04.814373', 'step': 13324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:04.872938', 'step': 13324, 'epoch': 2} {'type': 'loss', 'content': 0.12631341814994812, 'timestamp': '2025-09-30 22:33:04.875671', 'step': 13325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:04.932859', 'step': 13325, 'epoch': 2} {'type': 'loss', 'content': 0.12548133730888367, 'timestamp': '2025-09-30 22:33:04.935720', 'step': 13326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:04.992986', 'step': 13326, 'epoch': 2} {'type': 'loss', 'content': 0.12057071924209595, 'timestamp': '2025-09-30 22:33:04.995677', 'step': 13327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:05.053220', 'step': 13327, 'epoch': 2} {'type': 'loss', 'content': 0.10590365529060364, 'timestamp': '2025-09-30 22:33:05.059459', 'step': 13328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:05.116854', 'step': 13328, 'epoch': 2} {'type': 'loss', 'content': 0.09934773296117783, 'timestamp': '2025-09-30 22:33:05.119049', 'step': 13329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:05.182763', 'step': 13329, 'epoch': 2} {'type': 'loss', 'content': 0.15858039259910583, 'timestamp': '2025-09-30 22:33:05.185108', 'step': 13330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:05.250285', 'step': 13330, 'epoch': 2} {'type': 'loss', 'content': 0.10963809490203857, 'timestamp': '2025-09-30 22:33:05.253050', 'step': 13331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:05.309180', 'step': 13331, 'epoch': 2} {'type': 'loss', 'content': 0.07900430262088776, 'timestamp': '2025-09-30 22:33:05.315140', 'step': 13332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:05.373656', 'step': 13332, 'epoch': 2} {'type': 'loss', 'content': 0.12275239825248718, 'timestamp': '2025-09-30 22:33:05.376748', 'step': 13333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:05.441168', 'step': 13333, 'epoch': 2} {'type': 'loss', 'content': 0.03242197260260582, 'timestamp': '2025-09-30 22:33:05.443393', 'step': 13334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:05.510730', 'step': 13334, 'epoch': 2} {'type': 'loss', 'content': 0.10773994773626328, 'timestamp': '2025-09-30 22:33:05.512897', 'step': 13335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:05.571973', 'step': 13335, 'epoch': 2} {'type': 'loss', 'content': 0.08629407733678818, 'timestamp': '2025-09-30 22:33:05.578058', 'step': 13336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:05.634464', 'step': 13336, 'epoch': 2} {'type': 'loss', 'content': 0.06932080537080765, 'timestamp': '2025-09-30 22:33:05.636771', 'step': 13337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:05.693185', 'step': 13337, 'epoch': 2} {'type': 'loss', 'content': 0.07027052342891693, 'timestamp': '2025-09-30 22:33:05.695682', 'step': 13338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:05.757894', 'step': 13338, 'epoch': 2} {'type': 'loss', 'content': 0.12413086742162704, 'timestamp': '2025-09-30 22:33:05.760477', 'step': 13339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:05.817285', 'step': 13339, 'epoch': 2} {'type': 'loss', 'content': 0.11937249451875687, 'timestamp': '2025-09-30 22:33:05.823764', 'step': 13340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:05.881828', 'step': 13340, 'epoch': 2} {'type': 'loss', 'content': 0.18022872507572174, 'timestamp': '2025-09-30 22:33:05.884910', 'step': 13341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:05.942859', 'step': 13341, 'epoch': 2} {'type': 'loss', 'content': 0.11756952106952667, 'timestamp': '2025-09-30 22:33:05.945424', 'step': 13342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:06.014383', 'step': 13342, 'epoch': 2} {'type': 'loss', 'content': 0.15229065716266632, 'timestamp': '2025-09-30 22:33:06.019818', 'step': 13343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:06.075552', 'step': 13343, 'epoch': 2} {'type': 'loss', 'content': 0.13292400538921356, 'timestamp': '2025-09-30 22:33:06.081507', 'step': 13344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:06.137853', 'step': 13344, 'epoch': 2} {'type': 'loss', 'content': 0.05359582230448723, 'timestamp': '2025-09-30 22:33:06.140214', 'step': 13345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:06.206366', 'step': 13345, 'epoch': 2} {'type': 'loss', 'content': 0.12525323033332825, 'timestamp': '2025-09-30 22:33:06.209781', 'step': 13346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:06.266635', 'step': 13346, 'epoch': 2} {'type': 'loss', 'content': 0.08543001115322113, 'timestamp': '2025-09-30 22:33:06.269856', 'step': 13347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:06.336735', 'step': 13347, 'epoch': 2} {'type': 'loss', 'content': 0.14288492500782013, 'timestamp': '2025-09-30 22:33:06.342603', 'step': 13348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:06.398060', 'step': 13348, 'epoch': 2} {'type': 'loss', 'content': 0.2556160092353821, 'timestamp': '2025-09-30 22:33:06.400083', 'step': 13349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:06.456205', 'step': 13349, 'epoch': 2} {'type': 'loss', 'content': 0.1074887216091156, 'timestamp': '2025-09-30 22:33:06.458743', 'step': 13350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:06.515142', 'step': 13350, 'epoch': 2} {'type': 'loss', 'content': 0.1489241123199463, 'timestamp': '2025-09-30 22:33:06.517520', 'step': 13351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:06.575412', 'step': 13351, 'epoch': 2} {'type': 'loss', 'content': 0.06988493353128433, 'timestamp': '2025-09-30 22:33:06.581538', 'step': 13352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:06.647807', 'step': 13352, 'epoch': 2} {'type': 'loss', 'content': 0.08081872761249542, 'timestamp': '2025-09-30 22:33:06.650255', 'step': 13353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:06.706293', 'step': 13353, 'epoch': 2} {'type': 'loss', 'content': 0.09353062510490417, 'timestamp': '2025-09-30 22:33:06.708743', 'step': 13354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:06.765003', 'step': 13354, 'epoch': 2} {'type': 'loss', 'content': 0.16045472025871277, 'timestamp': '2025-09-30 22:33:06.767279', 'step': 13355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:06.824946', 'step': 13355, 'epoch': 2} {'type': 'loss', 'content': 0.13993363082408905, 'timestamp': '2025-09-30 22:33:06.830683', 'step': 13356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:06.887173', 'step': 13356, 'epoch': 2} {'type': 'loss', 'content': 0.13578109443187714, 'timestamp': '2025-09-30 22:33:06.889882', 'step': 13357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:06.946173', 'step': 13357, 'epoch': 2} {'type': 'loss', 'content': 0.09353505820035934, 'timestamp': '2025-09-30 22:33:06.948543', 'step': 13358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:07.005814', 'step': 13358, 'epoch': 2} {'type': 'loss', 'content': 0.061892714351415634, 'timestamp': '2025-09-30 22:33:07.008154', 'step': 13359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:07.064876', 'step': 13359, 'epoch': 2} {'type': 'loss', 'content': 0.1501140296459198, 'timestamp': '2025-09-30 22:33:07.070725', 'step': 13360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:07.130867', 'step': 13360, 'epoch': 2} {'type': 'loss', 'content': 0.10401888936758041, 'timestamp': '2025-09-30 22:33:07.133166', 'step': 13361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:07.189813', 'step': 13361, 'epoch': 2} {'type': 'loss', 'content': 0.08448544144630432, 'timestamp': '2025-09-30 22:33:07.192114', 'step': 13362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:07.248505', 'step': 13362, 'epoch': 2} {'type': 'loss', 'content': 0.07870613783597946, 'timestamp': '2025-09-30 22:33:07.250753', 'step': 13363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:07.307345', 'step': 13363, 'epoch': 2} {'type': 'loss', 'content': 0.22396636009216309, 'timestamp': '2025-09-30 22:33:07.313338', 'step': 13364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:07.369458', 'step': 13364, 'epoch': 2} {'type': 'loss', 'content': 0.12418632954359055, 'timestamp': '2025-09-30 22:33:07.371575', 'step': 13365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:07.427296', 'step': 13365, 'epoch': 2} {'type': 'loss', 'content': 0.13609138131141663, 'timestamp': '2025-09-30 22:33:07.429650', 'step': 13366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:07.486002', 'step': 13366, 'epoch': 2} {'type': 'loss', 'content': 0.1781938672065735, 'timestamp': '2025-09-30 22:33:07.489006', 'step': 13367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:07.546049', 'step': 13367, 'epoch': 2} {'type': 'loss', 'content': 0.14959172904491425, 'timestamp': '2025-09-30 22:33:07.552330', 'step': 13368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:07.608298', 'step': 13368, 'epoch': 2} {'type': 'loss', 'content': 0.10026095807552338, 'timestamp': '2025-09-30 22:33:07.610859', 'step': 13369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:07.667639', 'step': 13369, 'epoch': 2} {'type': 'loss', 'content': 0.14684566855430603, 'timestamp': '2025-09-30 22:33:07.670557', 'step': 13370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:07.728443', 'step': 13370, 'epoch': 2} {'type': 'loss', 'content': 0.11746688187122345, 'timestamp': '2025-09-30 22:33:07.730849', 'step': 13371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:07.787677', 'step': 13371, 'epoch': 2} {'type': 'loss', 'content': 0.12118354439735413, 'timestamp': '2025-09-30 22:33:07.793787', 'step': 13372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:07.851575', 'step': 13372, 'epoch': 2} {'type': 'loss', 'content': 0.10178685933351517, 'timestamp': '2025-09-30 22:33:07.853872', 'step': 13373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:07.912198', 'step': 13373, 'epoch': 2} {'type': 'loss', 'content': 0.25399887561798096, 'timestamp': '2025-09-30 22:33:07.914770', 'step': 13374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:07.972855', 'step': 13374, 'epoch': 2} {'type': 'loss', 'content': 0.18017368018627167, 'timestamp': '2025-09-30 22:33:07.975431', 'step': 13375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:08.035091', 'step': 13375, 'epoch': 2} {'type': 'loss', 'content': 0.10665726661682129, 'timestamp': '2025-09-30 22:33:08.041399', 'step': 13376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:08.098258', 'step': 13376, 'epoch': 2} {'type': 'loss', 'content': 0.12301482260227203, 'timestamp': '2025-09-30 22:33:08.101772', 'step': 13377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:08.159307', 'step': 13377, 'epoch': 2} {'type': 'loss', 'content': 0.08582880347967148, 'timestamp': '2025-09-30 22:33:08.161636', 'step': 13378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.218648', 'step': 13378, 'epoch': 2} {'type': 'loss', 'content': 0.17265833914279938, 'timestamp': '2025-09-30 22:33:08.220982', 'step': 13379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.278907', 'step': 13379, 'epoch': 2} {'type': 'loss', 'content': 0.13850925862789154, 'timestamp': '2025-09-30 22:33:08.284805', 'step': 13380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.341666', 'step': 13380, 'epoch': 2} {'type': 'loss', 'content': 0.14021354913711548, 'timestamp': '2025-09-30 22:33:08.344572', 'step': 13381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:08.403358', 'step': 13381, 'epoch': 2} {'type': 'loss', 'content': 0.16042909026145935, 'timestamp': '2025-09-30 22:33:08.405739', 'step': 13382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.465136', 'step': 13382, 'epoch': 2} {'type': 'loss', 'content': 0.18400776386260986, 'timestamp': '2025-09-30 22:33:08.467479', 'step': 13383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:08.525667', 'step': 13383, 'epoch': 2} {'type': 'loss', 'content': 0.11833299696445465, 'timestamp': '2025-09-30 22:33:08.531578', 'step': 13384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:08.589376', 'step': 13384, 'epoch': 2} {'type': 'loss', 'content': 0.051454946398735046, 'timestamp': '2025-09-30 22:33:08.595846', 'step': 13385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:08.655648', 'step': 13385, 'epoch': 2} {'type': 'loss', 'content': 0.09545264393091202, 'timestamp': '2025-09-30 22:33:08.657741', 'step': 13386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.714848', 'step': 13386, 'epoch': 2} {'type': 'loss', 'content': 0.059911083430051804, 'timestamp': '2025-09-30 22:33:08.717153', 'step': 13387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.775532', 'step': 13387, 'epoch': 2} {'type': 'loss', 'content': 0.06541462987661362, 'timestamp': '2025-09-30 22:33:08.781278', 'step': 13388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.837761', 'step': 13388, 'epoch': 2} {'type': 'loss', 'content': 0.08438555151224136, 'timestamp': '2025-09-30 22:33:08.839959', 'step': 13389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.898586', 'step': 13389, 'epoch': 2} {'type': 'loss', 'content': 0.05279430001974106, 'timestamp': '2025-09-30 22:33:08.901157', 'step': 13390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:08.958100', 'step': 13390, 'epoch': 2} {'type': 'loss', 'content': 0.13460499048233032, 'timestamp': '2025-09-30 22:33:08.960216', 'step': 13391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:09.017963', 'step': 13391, 'epoch': 2} {'type': 'loss', 'content': 0.12159812450408936, 'timestamp': '2025-09-30 22:33:09.023784', 'step': 13392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:09.081184', 'step': 13392, 'epoch': 2} {'type': 'loss', 'content': 0.18169118463993073, 'timestamp': '2025-09-30 22:33:09.083746', 'step': 13393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:09.142357', 'step': 13393, 'epoch': 2} {'type': 'loss', 'content': 0.10062508285045624, 'timestamp': '2025-09-30 22:33:09.145127', 'step': 13394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:09.202472', 'step': 13394, 'epoch': 2} {'type': 'loss', 'content': 0.08269891887903214, 'timestamp': '2025-09-30 22:33:09.204853', 'step': 13395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:09.263566', 'step': 13395, 'epoch': 2} {'type': 'loss', 'content': 0.09781623631715775, 'timestamp': '2025-09-30 22:33:09.269449', 'step': 13396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:09.327648', 'step': 13396, 'epoch': 2} {'type': 'loss', 'content': 0.0958811417222023, 'timestamp': '2025-09-30 22:33:09.329997', 'step': 13397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:09.387900', 'step': 13397, 'epoch': 2} {'type': 'loss', 'content': 0.12560418248176575, 'timestamp': '2025-09-30 22:33:09.390701', 'step': 13398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:09.447658', 'step': 13398, 'epoch': 2} {'type': 'loss', 'content': 0.11724746972322464, 'timestamp': '2025-09-30 22:33:09.449750', 'step': 13399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:09.517183', 'step': 13399, 'epoch': 2} {'type': 'loss', 'content': 0.276266872882843, 'timestamp': '2025-09-30 22:33:09.523358', 'step': 13400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:09.579546', 'step': 13400, 'epoch': 2} {'type': 'loss', 'content': 0.1843162328004837, 'timestamp': '2025-09-30 22:33:09.581807', 'step': 13401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:09.638184', 'step': 13401, 'epoch': 2} {'type': 'loss', 'content': 0.10524796694517136, 'timestamp': '2025-09-30 22:33:09.640415', 'step': 13402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:09.697403', 'step': 13402, 'epoch': 2} {'type': 'loss', 'content': 0.04398451745510101, 'timestamp': '2025-09-30 22:33:09.699742', 'step': 13403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:09.757720', 'step': 13403, 'epoch': 2} {'type': 'loss', 'content': 0.09346567094326019, 'timestamp': '2025-09-30 22:33:09.763564', 'step': 13404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:09.821400', 'step': 13404, 'epoch': 2} {'type': 'loss', 'content': 0.20577916502952576, 'timestamp': '2025-09-30 22:33:09.827290', 'step': 13405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:09.886522', 'step': 13405, 'epoch': 2} {'type': 'loss', 'content': 0.08928505331277847, 'timestamp': '2025-09-30 22:33:09.888898', 'step': 13406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:09.946399', 'step': 13406, 'epoch': 2} {'type': 'loss', 'content': 0.10012936592102051, 'timestamp': '2025-09-30 22:33:09.948851', 'step': 13407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:10.006193', 'step': 13407, 'epoch': 2} {'type': 'loss', 'content': 0.15589042007923126, 'timestamp': '2025-09-30 22:33:10.012045', 'step': 13408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:10.067786', 'step': 13408, 'epoch': 2} {'type': 'loss', 'content': 0.14205701649188995, 'timestamp': '2025-09-30 22:33:10.070274', 'step': 13409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:10.128195', 'step': 13409, 'epoch': 2} {'type': 'loss', 'content': 0.19445931911468506, 'timestamp': '2025-09-30 22:33:10.130696', 'step': 13410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:10.187504', 'step': 13410, 'epoch': 2} {'type': 'loss', 'content': 0.10485689342021942, 'timestamp': '2025-09-30 22:33:10.189994', 'step': 13411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:10.245795', 'step': 13411, 'epoch': 2} {'type': 'loss', 'content': 0.14734460413455963, 'timestamp': '2025-09-30 22:33:10.251811', 'step': 13412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:10.309581', 'step': 13412, 'epoch': 2} {'type': 'loss', 'content': 0.12052310258150101, 'timestamp': '2025-09-30 22:33:10.311737', 'step': 13413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:10.369717', 'step': 13413, 'epoch': 2} {'type': 'loss', 'content': 0.0932449996471405, 'timestamp': '2025-09-30 22:33:10.372052', 'step': 13414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:10.434956', 'step': 13414, 'epoch': 2} {'type': 'loss', 'content': 0.1055501326918602, 'timestamp': '2025-09-30 22:33:10.437275', 'step': 13415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:10.495073', 'step': 13415, 'epoch': 2} {'type': 'loss', 'content': 0.06805703788995743, 'timestamp': '2025-09-30 22:33:10.501036', 'step': 13416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:10.556664', 'step': 13416, 'epoch': 2} {'type': 'loss', 'content': 0.1044624075293541, 'timestamp': '2025-09-30 22:33:10.559145', 'step': 13417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:10.616717', 'step': 13417, 'epoch': 2} {'type': 'loss', 'content': 0.12365962564945221, 'timestamp': '2025-09-30 22:33:10.620110', 'step': 13418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:10.678461', 'step': 13418, 'epoch': 2} {'type': 'loss', 'content': 0.15376624464988708, 'timestamp': '2025-09-30 22:33:10.681367', 'step': 13419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:10.742159', 'step': 13419, 'epoch': 2} {'type': 'loss', 'content': 0.0551772303879261, 'timestamp': '2025-09-30 22:33:10.747921', 'step': 13420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:10.808285', 'step': 13420, 'epoch': 2} {'type': 'loss', 'content': 0.16563370823860168, 'timestamp': '2025-09-30 22:33:10.810594', 'step': 13421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:10.885642', 'step': 13421, 'epoch': 2} {'type': 'loss', 'content': 0.122005894780159, 'timestamp': '2025-09-30 22:33:10.887663', 'step': 13422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:10.962024', 'step': 13422, 'epoch': 2} {'type': 'loss', 'content': 0.07781196385622025, 'timestamp': '2025-09-30 22:33:10.964386', 'step': 13423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:11.020752', 'step': 13423, 'epoch': 2} {'type': 'loss', 'content': 0.04242567718029022, 'timestamp': '2025-09-30 22:33:11.027973', 'step': 13424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:11.084056', 'step': 13424, 'epoch': 2} {'type': 'loss', 'content': 0.09430301934480667, 'timestamp': '2025-09-30 22:33:11.086742', 'step': 13425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:11.156623', 'step': 13425, 'epoch': 2} {'type': 'loss', 'content': 0.1077284961938858, 'timestamp': '2025-09-30 22:33:11.159273', 'step': 13426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:11.216714', 'step': 13426, 'epoch': 2} {'type': 'loss', 'content': 0.08314060419797897, 'timestamp': '2025-09-30 22:33:11.219300', 'step': 13427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:11.276654', 'step': 13427, 'epoch': 2} {'type': 'loss', 'content': 0.13853591680526733, 'timestamp': '2025-09-30 22:33:11.282861', 'step': 13428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:11.359240', 'step': 13428, 'epoch': 2} {'type': 'loss', 'content': 0.0764344334602356, 'timestamp': '2025-09-30 22:33:11.361379', 'step': 13429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:11.418542', 'step': 13429, 'epoch': 2} {'type': 'loss', 'content': 0.120153047144413, 'timestamp': '2025-09-30 22:33:11.421093', 'step': 13430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:11.477978', 'step': 13430, 'epoch': 2} {'type': 'loss', 'content': 0.07171221077442169, 'timestamp': '2025-09-30 22:33:11.480109', 'step': 13431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:33:11.536664', 'step': 13431, 'epoch': 2} {'type': 'loss', 'content': 0.09027649462223053, 'timestamp': '2025-09-30 22:33:11.542479', 'step': 13432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:11.606080', 'step': 13432, 'epoch': 2} {'type': 'loss', 'content': 0.13163959980010986, 'timestamp': '2025-09-30 22:33:11.608210', 'step': 13433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:11.671506', 'step': 13433, 'epoch': 2} {'type': 'loss', 'content': 0.10071702301502228, 'timestamp': '2025-09-30 22:33:11.674745', 'step': 13434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:11.736714', 'step': 13434, 'epoch': 2} {'type': 'loss', 'content': 0.16520656645298004, 'timestamp': '2025-09-30 22:33:11.739007', 'step': 13435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:11.796571', 'step': 13435, 'epoch': 2} {'type': 'loss', 'content': 0.11045172065496445, 'timestamp': '2025-09-30 22:33:11.802917', 'step': 13436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:11.862081', 'step': 13436, 'epoch': 2} {'type': 'loss', 'content': 0.11423508822917938, 'timestamp': '2025-09-30 22:33:11.864323', 'step': 13437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:11.921262', 'step': 13437, 'epoch': 2} {'type': 'loss', 'content': 0.12205031514167786, 'timestamp': '2025-09-30 22:33:11.923895', 'step': 13438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:11.981078', 'step': 13438, 'epoch': 2} {'type': 'loss', 'content': 0.19963014125823975, 'timestamp': '2025-09-30 22:33:11.983364', 'step': 13439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:33:12.049175', 'step': 13439, 'epoch': 2} {'type': 'loss', 'content': 0.12133944779634476, 'timestamp': '2025-09-30 22:33:12.055155', 'step': 13440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:12.110892', 'step': 13440, 'epoch': 2} {'type': 'loss', 'content': 0.13830678164958954, 'timestamp': '2025-09-30 22:33:12.113126', 'step': 13441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:12.169631', 'step': 13441, 'epoch': 2} {'type': 'loss', 'content': 0.1555670201778412, 'timestamp': '2025-09-30 22:33:12.171916', 'step': 13442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:12.230155', 'step': 13442, 'epoch': 2} {'type': 'loss', 'content': 0.13713960349559784, 'timestamp': '2025-09-30 22:33:12.232412', 'step': 13443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:12.321582', 'step': 13443, 'epoch': 2} {'type': 'loss', 'content': 0.2354971170425415, 'timestamp': '2025-09-30 22:33:12.333670', 'step': 13444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:12.390678', 'step': 13444, 'epoch': 2} {'type': 'loss', 'content': 0.10948974639177322, 'timestamp': '2025-09-30 22:33:12.392785', 'step': 13445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:12.450142', 'step': 13445, 'epoch': 2} {'type': 'loss', 'content': 0.08562485873699188, 'timestamp': '2025-09-30 22:33:12.452634', 'step': 13446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:12.510097', 'step': 13446, 'epoch': 2} {'type': 'loss', 'content': 0.08265630155801773, 'timestamp': '2025-09-30 22:33:12.512421', 'step': 13447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:12.569362', 'step': 13447, 'epoch': 2} {'type': 'loss', 'content': 0.13454152643680573, 'timestamp': '2025-09-30 22:33:12.575378', 'step': 13448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:12.632127', 'step': 13448, 'epoch': 2} {'type': 'loss', 'content': 0.08937699347734451, 'timestamp': '2025-09-30 22:33:12.637972', 'step': 13449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:12.700961', 'step': 13449, 'epoch': 2} {'type': 'loss', 'content': 0.1432119905948639, 'timestamp': '2025-09-30 22:33:12.703468', 'step': 13450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:12.766185', 'step': 13450, 'epoch': 2} {'type': 'loss', 'content': 0.08860279619693756, 'timestamp': '2025-09-30 22:33:12.768444', 'step': 13451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:12.826444', 'step': 13451, 'epoch': 2} {'type': 'loss', 'content': 0.060766205191612244, 'timestamp': '2025-09-30 22:33:12.832442', 'step': 13452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:12.888391', 'step': 13452, 'epoch': 2} {'type': 'loss', 'content': 0.12182343006134033, 'timestamp': '2025-09-30 22:33:12.890756', 'step': 13453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:12.947905', 'step': 13453, 'epoch': 2} {'type': 'loss', 'content': 0.164528951048851, 'timestamp': '2025-09-30 22:33:12.950537', 'step': 13454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:13.021924', 'step': 13454, 'epoch': 2} {'type': 'loss', 'content': 0.21510209143161774, 'timestamp': '2025-09-30 22:33:13.024224', 'step': 13455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:13.085409', 'step': 13455, 'epoch': 2} {'type': 'loss', 'content': 0.10981722176074982, 'timestamp': '2025-09-30 22:33:13.091375', 'step': 13456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:13.146857', 'step': 13456, 'epoch': 2} {'type': 'loss', 'content': 0.1324407160282135, 'timestamp': '2025-09-30 22:33:13.148937', 'step': 13457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:13.210710', 'step': 13457, 'epoch': 2} {'type': 'loss', 'content': 0.13226759433746338, 'timestamp': '2025-09-30 22:33:13.213016', 'step': 13458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:13.270282', 'step': 13458, 'epoch': 2} {'type': 'loss', 'content': 0.15022791922092438, 'timestamp': '2025-09-30 22:33:13.272824', 'step': 13459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:13.328879', 'step': 13459, 'epoch': 2} {'type': 'loss', 'content': 0.18348734080791473, 'timestamp': '2025-09-30 22:33:13.334728', 'step': 13460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:13.403211', 'step': 13460, 'epoch': 2} {'type': 'loss', 'content': 0.12422586232423782, 'timestamp': '2025-09-30 22:33:13.405370', 'step': 13461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:13.461955', 'step': 13461, 'epoch': 2} {'type': 'loss', 'content': 0.13025633990764618, 'timestamp': '2025-09-30 22:33:13.464271', 'step': 13462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:13.520814', 'step': 13462, 'epoch': 2} {'type': 'loss', 'content': 0.1388760209083557, 'timestamp': '2025-09-30 22:33:13.522885', 'step': 13463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:13.579059', 'step': 13463, 'epoch': 2} {'type': 'loss', 'content': 0.11145845055580139, 'timestamp': '2025-09-30 22:33:13.584839', 'step': 13464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:13.640788', 'step': 13464, 'epoch': 2} {'type': 'loss', 'content': 0.11839484423398972, 'timestamp': '2025-09-30 22:33:13.643529', 'step': 13465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:13.702418', 'step': 13465, 'epoch': 2} {'type': 'loss', 'content': 0.07496973872184753, 'timestamp': '2025-09-30 22:33:13.704746', 'step': 13466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:13.761726', 'step': 13466, 'epoch': 2} {'type': 'loss', 'content': 0.19618645310401917, 'timestamp': '2025-09-30 22:33:13.765588', 'step': 13467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:13.837822', 'step': 13467, 'epoch': 2} {'type': 'loss', 'content': 0.061514291912317276, 'timestamp': '2025-09-30 22:33:13.845628', 'step': 13468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:13.902006', 'step': 13468, 'epoch': 2} {'type': 'loss', 'content': 0.09537594020366669, 'timestamp': '2025-09-30 22:33:13.904481', 'step': 13469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:13.962429', 'step': 13469, 'epoch': 2} {'type': 'loss', 'content': 0.11865244060754776, 'timestamp': '2025-09-30 22:33:13.964984', 'step': 13470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:14.022887', 'step': 13470, 'epoch': 2} {'type': 'loss', 'content': 0.1571454405784607, 'timestamp': '2025-09-30 22:33:14.025728', 'step': 13471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:14.086078', 'step': 13471, 'epoch': 2} {'type': 'loss', 'content': 0.05705241486430168, 'timestamp': '2025-09-30 22:33:14.092542', 'step': 13472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:14.148844', 'step': 13472, 'epoch': 2} {'type': 'loss', 'content': 0.16392762959003448, 'timestamp': '2025-09-30 22:33:14.151321', 'step': 13473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.208078', 'step': 13473, 'epoch': 2} {'type': 'loss', 'content': 0.1440456211566925, 'timestamp': '2025-09-30 22:33:14.210463', 'step': 13474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:33:14.272475', 'step': 13474, 'epoch': 2} {'type': 'loss', 'content': 0.08024829626083374, 'timestamp': '2025-09-30 22:33:14.275666', 'step': 13475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:14.332601', 'step': 13475, 'epoch': 2} {'type': 'loss', 'content': 0.15438510477542877, 'timestamp': '2025-09-30 22:33:14.338845', 'step': 13476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:14.394517', 'step': 13476, 'epoch': 2} {'type': 'loss', 'content': 0.13825969398021698, 'timestamp': '2025-09-30 22:33:14.396805', 'step': 13477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:14.454092', 'step': 13477, 'epoch': 2} {'type': 'loss', 'content': 0.04904301464557648, 'timestamp': '2025-09-30 22:33:14.458112', 'step': 13478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.521498', 'step': 13478, 'epoch': 2} {'type': 'loss', 'content': 0.06850618869066238, 'timestamp': '2025-09-30 22:33:14.524278', 'step': 13479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:14.584161', 'step': 13479, 'epoch': 2} {'type': 'loss', 'content': 0.17446644604206085, 'timestamp': '2025-09-30 22:33:14.592017', 'step': 13480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.650979', 'step': 13480, 'epoch': 2} {'type': 'loss', 'content': 0.1360810399055481, 'timestamp': '2025-09-30 22:33:14.653476', 'step': 13481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.711200', 'step': 13481, 'epoch': 2} {'type': 'loss', 'content': 0.09506900608539581, 'timestamp': '2025-09-30 22:33:14.713343', 'step': 13482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.773391', 'step': 13482, 'epoch': 2} {'type': 'loss', 'content': 0.07984204590320587, 'timestamp': '2025-09-30 22:33:14.776638', 'step': 13483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.839279', 'step': 13483, 'epoch': 2} {'type': 'loss', 'content': 0.13009484112262726, 'timestamp': '2025-09-30 22:33:14.844946', 'step': 13484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.902841', 'step': 13484, 'epoch': 2} {'type': 'loss', 'content': 0.09357152134180069, 'timestamp': '2025-09-30 22:33:14.908572', 'step': 13485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:14.972435', 'step': 13485, 'epoch': 2} {'type': 'loss', 'content': 0.12050531804561615, 'timestamp': '2025-09-30 22:33:14.975710', 'step': 13486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.039911', 'step': 13486, 'epoch': 2} {'type': 'loss', 'content': 0.06269384175539017, 'timestamp': '2025-09-30 22:33:15.053182', 'step': 13487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.114255', 'step': 13487, 'epoch': 2} {'type': 'loss', 'content': 0.13622790575027466, 'timestamp': '2025-09-30 22:33:15.119997', 'step': 13488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.192078', 'step': 13488, 'epoch': 2} {'type': 'loss', 'content': 0.14980106055736542, 'timestamp': '2025-09-30 22:33:15.194925', 'step': 13489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:15.286044', 'step': 13489, 'epoch': 2} {'type': 'loss', 'content': 0.08803104609251022, 'timestamp': '2025-09-30 22:33:15.289468', 'step': 13490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.359334', 'step': 13490, 'epoch': 2} {'type': 'loss', 'content': 0.075141541659832, 'timestamp': '2025-09-30 22:33:15.363688', 'step': 13491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:15.420962', 'step': 13491, 'epoch': 2} {'type': 'loss', 'content': 0.1251116394996643, 'timestamp': '2025-09-30 22:33:15.428759', 'step': 13492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.486396', 'step': 13492, 'epoch': 2} {'type': 'loss', 'content': 0.11969190090894699, 'timestamp': '2025-09-30 22:33:15.503898', 'step': 13493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.574129', 'step': 13493, 'epoch': 2} {'type': 'loss', 'content': 0.12893736362457275, 'timestamp': '2025-09-30 22:33:15.591551', 'step': 13494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:15.649310', 'step': 13494, 'epoch': 2} {'type': 'loss', 'content': 0.15898388624191284, 'timestamp': '2025-09-30 22:33:15.654805', 'step': 13495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.713824', 'step': 13495, 'epoch': 2} {'type': 'loss', 'content': 0.1982407420873642, 'timestamp': '2025-09-30 22:33:15.732779', 'step': 13496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:15.794875', 'step': 13496, 'epoch': 2} {'type': 'loss', 'content': 0.09368088841438293, 'timestamp': '2025-09-30 22:33:15.797909', 'step': 13497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:15.855647', 'step': 13497, 'epoch': 2} {'type': 'loss', 'content': 0.1479307860136032, 'timestamp': '2025-09-30 22:33:15.862429', 'step': 13498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.922049', 'step': 13498, 'epoch': 2} {'type': 'loss', 'content': 0.14943043887615204, 'timestamp': '2025-09-30 22:33:15.927320', 'step': 13499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:15.988153', 'step': 13499, 'epoch': 2} {'type': 'loss', 'content': 0.08944695442914963, 'timestamp': '2025-09-30 22:33:15.996113', 'step': 13500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13500', 'timestamp': '2025-09-30 22:33:16.499246', 'step': 13500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:16.559326', 'step': 13500, 'epoch': 2} {'type': 'loss', 'content': 0.13941097259521484, 'timestamp': '2025-09-30 22:33:16.561938', 'step': 13501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:16.619520', 'step': 13501, 'epoch': 2} {'type': 'loss', 'content': 0.08901836723089218, 'timestamp': '2025-09-30 22:33:16.621557', 'step': 13502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:16.680350', 'step': 13502, 'epoch': 2} {'type': 'loss', 'content': 0.1409919410943985, 'timestamp': '2025-09-30 22:33:16.683203', 'step': 13503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:16.739711', 'step': 13503, 'epoch': 2} {'type': 'loss', 'content': 0.07668934762477875, 'timestamp': '2025-09-30 22:33:16.756131', 'step': 13504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:16.812715', 'step': 13504, 'epoch': 2} {'type': 'loss', 'content': 0.13070887327194214, 'timestamp': '2025-09-30 22:33:16.814920', 'step': 13505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:16.872145', 'step': 13505, 'epoch': 2} {'type': 'loss', 'content': 0.10256267338991165, 'timestamp': '2025-09-30 22:33:16.874468', 'step': 13506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:16.933992', 'step': 13506, 'epoch': 2} {'type': 'loss', 'content': 0.06940547376871109, 'timestamp': '2025-09-30 22:33:16.936179', 'step': 13507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:16.992816', 'step': 13507, 'epoch': 2} {'type': 'loss', 'content': 0.11009827256202698, 'timestamp': '2025-09-30 22:33:16.998807', 'step': 13508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:17.054938', 'step': 13508, 'epoch': 2} {'type': 'loss', 'content': 0.10292931646108627, 'timestamp': '2025-09-30 22:33:17.057445', 'step': 13509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:17.114821', 'step': 13509, 'epoch': 2} {'type': 'loss', 'content': 0.09895876795053482, 'timestamp': '2025-09-30 22:33:17.126542', 'step': 13510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:17.193861', 'step': 13510, 'epoch': 2} {'type': 'loss', 'content': 0.21468320488929749, 'timestamp': '2025-09-30 22:33:17.197099', 'step': 13511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:17.255515', 'step': 13511, 'epoch': 2} {'type': 'loss', 'content': 0.06370964646339417, 'timestamp': '2025-09-30 22:33:17.262756', 'step': 13512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:17.320160', 'step': 13512, 'epoch': 2} {'type': 'loss', 'content': 0.07251614332199097, 'timestamp': '2025-09-30 22:33:17.322708', 'step': 13513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:17.379077', 'step': 13513, 'epoch': 2} {'type': 'loss', 'content': 0.0678897425532341, 'timestamp': '2025-09-30 22:33:17.381202', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:33:31.899210', 'step': 13514, 'epoch': 2} {'type': 'pplx', 'content': 9474.44919967981, 'timestamp': '2025-09-30 22:33:31.903526', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:31.964611', 'step': 13514, 'epoch': 2} {'type': 'loss', 'content': 0.11868505924940109, 'timestamp': '2025-09-30 22:33:31.969268', 'step': 13515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:32.037270', 'step': 13515, 'epoch': 2} {'type': 'loss', 'content': 0.11810064315795898, 'timestamp': '2025-09-30 22:33:32.050326', 'step': 13516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:32.107356', 'step': 13516, 'epoch': 2} {'type': 'loss', 'content': 0.08152257651090622, 'timestamp': '2025-09-30 22:33:32.113338', 'step': 13517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:32.171438', 'step': 13517, 'epoch': 2} {'type': 'loss', 'content': 0.11678962409496307, 'timestamp': '2025-09-30 22:33:32.174874', 'step': 13518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:32.255469', 'step': 13518, 'epoch': 2} {'type': 'loss', 'content': 0.25888118147850037, 'timestamp': '2025-09-30 22:33:32.266680', 'step': 13519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:32.331961', 'step': 13519, 'epoch': 2} {'type': 'loss', 'content': 0.12860798835754395, 'timestamp': '2025-09-30 22:33:32.338504', 'step': 13520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:32.410854', 'step': 13520, 'epoch': 2} {'type': 'loss', 'content': 0.15362043678760529, 'timestamp': '2025-09-30 22:33:32.416039', 'step': 13521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:32.480793', 'step': 13521, 'epoch': 2} {'type': 'loss', 'content': 0.16543379426002502, 'timestamp': '2025-09-30 22:33:32.483769', 'step': 13522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:32.546249', 'step': 13522, 'epoch': 2} {'type': 'loss', 'content': 0.046615999191999435, 'timestamp': '2025-09-30 22:33:32.548526', 'step': 13523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:32.606670', 'step': 13523, 'epoch': 2} {'type': 'loss', 'content': 0.0701521560549736, 'timestamp': '2025-09-30 22:33:32.621166', 'step': 13524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:32.683292', 'step': 13524, 'epoch': 2} {'type': 'loss', 'content': 0.17239898443222046, 'timestamp': '2025-09-30 22:33:32.693752', 'step': 13525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:32.752334', 'step': 13525, 'epoch': 2} {'type': 'loss', 'content': 0.036875851452350616, 'timestamp': '2025-09-30 22:33:32.756150', 'step': 13526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:32.814536', 'step': 13526, 'epoch': 2} {'type': 'loss', 'content': 0.1421114057302475, 'timestamp': '2025-09-30 22:33:32.822276', 'step': 13527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:32.885306', 'step': 13527, 'epoch': 2} {'type': 'loss', 'content': 0.10057941824197769, 'timestamp': '2025-09-30 22:33:32.892925', 'step': 13528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:32.966802', 'step': 13528, 'epoch': 2} {'type': 'loss', 'content': 0.11030636727809906, 'timestamp': '2025-09-30 22:33:32.969248', 'step': 13529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:33.029726', 'step': 13529, 'epoch': 2} {'type': 'loss', 'content': 0.09548201411962509, 'timestamp': '2025-09-30 22:33:33.033007', 'step': 13530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:33.091562', 'step': 13530, 'epoch': 2} {'type': 'loss', 'content': 0.14732113480567932, 'timestamp': '2025-09-30 22:33:33.095384', 'step': 13531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:33.154142', 'step': 13531, 'epoch': 2} {'type': 'loss', 'content': 0.08059094101190567, 'timestamp': '2025-09-30 22:33:33.163760', 'step': 13532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:33.222051', 'step': 13532, 'epoch': 2} {'type': 'loss', 'content': 0.0815557986497879, 'timestamp': '2025-09-30 22:33:33.225577', 'step': 13533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:33.301277', 'step': 13533, 'epoch': 2} {'type': 'loss', 'content': 0.08994453400373459, 'timestamp': '2025-09-30 22:33:33.305113', 'step': 13534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:33.363547', 'step': 13534, 'epoch': 2} {'type': 'loss', 'content': 0.09906773269176483, 'timestamp': '2025-09-30 22:33:33.367467', 'step': 13535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:33.427066', 'step': 13535, 'epoch': 2} {'type': 'loss', 'content': 0.13544927537441254, 'timestamp': '2025-09-30 22:33:33.434013', 'step': 13536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:33.492833', 'step': 13536, 'epoch': 2} {'type': 'loss', 'content': 0.06318464130163193, 'timestamp': '2025-09-30 22:33:33.502314', 'step': 13537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:33.561397', 'step': 13537, 'epoch': 2} {'type': 'loss', 'content': 0.0855872705578804, 'timestamp': '2025-09-30 22:33:33.565340', 'step': 13538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:33.625076', 'step': 13538, 'epoch': 2} {'type': 'loss', 'content': 0.12770237028598785, 'timestamp': '2025-09-30 22:33:33.636366', 'step': 13539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:33.704762', 'step': 13539, 'epoch': 2} {'type': 'loss', 'content': 0.12485294044017792, 'timestamp': '2025-09-30 22:33:33.711899', 'step': 13540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:33.773307', 'step': 13540, 'epoch': 2} {'type': 'loss', 'content': 0.19066058099269867, 'timestamp': '2025-09-30 22:33:33.776832', 'step': 13541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:33.843273', 'step': 13541, 'epoch': 2} {'type': 'loss', 'content': 0.07560315728187561, 'timestamp': '2025-09-30 22:33:33.854001', 'step': 13542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:33.912156', 'step': 13542, 'epoch': 2} {'type': 'loss', 'content': 0.13066600263118744, 'timestamp': '2025-09-30 22:33:33.926350', 'step': 13543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:33.991140', 'step': 13543, 'epoch': 2} {'type': 'loss', 'content': 0.10213121771812439, 'timestamp': '2025-09-30 22:33:33.999383', 'step': 13544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:34.064356', 'step': 13544, 'epoch': 2} {'type': 'loss', 'content': 0.1696368008852005, 'timestamp': '2025-09-30 22:33:34.067844', 'step': 13545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.134313', 'step': 13545, 'epoch': 2} {'type': 'loss', 'content': 0.17750681936740875, 'timestamp': '2025-09-30 22:33:34.137739', 'step': 13546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:34.198358', 'step': 13546, 'epoch': 2} {'type': 'loss', 'content': 0.2651374340057373, 'timestamp': '2025-09-30 22:33:34.204490', 'step': 13547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:34.263510', 'step': 13547, 'epoch': 2} {'type': 'loss', 'content': 0.17238490283489227, 'timestamp': '2025-09-30 22:33:34.273091', 'step': 13548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.332793', 'step': 13548, 'epoch': 2} {'type': 'loss', 'content': 0.050761520862579346, 'timestamp': '2025-09-30 22:33:34.338735', 'step': 13549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.412729', 'step': 13549, 'epoch': 2} {'type': 'loss', 'content': 0.12331556528806686, 'timestamp': '2025-09-30 22:33:34.418076', 'step': 13550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:34.501239', 'step': 13550, 'epoch': 2} {'type': 'loss', 'content': 0.12491622567176819, 'timestamp': '2025-09-30 22:33:34.505864', 'step': 13551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:34.564757', 'step': 13551, 'epoch': 2} {'type': 'loss', 'content': 0.14002084732055664, 'timestamp': '2025-09-30 22:33:34.572862', 'step': 13552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.641756', 'step': 13552, 'epoch': 2} {'type': 'loss', 'content': 0.12158084660768509, 'timestamp': '2025-09-30 22:33:34.645807', 'step': 13553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.711325', 'step': 13553, 'epoch': 2} {'type': 'loss', 'content': 0.11662203818559647, 'timestamp': '2025-09-30 22:33:34.714804', 'step': 13554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.772575', 'step': 13554, 'epoch': 2} {'type': 'loss', 'content': 0.17337146401405334, 'timestamp': '2025-09-30 22:33:34.777119', 'step': 13555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.845420', 'step': 13555, 'epoch': 2} {'type': 'loss', 'content': 0.16538558900356293, 'timestamp': '2025-09-30 22:33:34.852874', 'step': 13556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:34.912687', 'step': 13556, 'epoch': 2} {'type': 'loss', 'content': 0.110074482858181, 'timestamp': '2025-09-30 22:33:34.919133', 'step': 13557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:34.980157', 'step': 13557, 'epoch': 2} {'type': 'loss', 'content': 0.09229600429534912, 'timestamp': '2025-09-30 22:33:34.986385', 'step': 13558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:35.048157', 'step': 13558, 'epoch': 2} {'type': 'loss', 'content': 0.10635935515165329, 'timestamp': '2025-09-30 22:33:35.055317', 'step': 13559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:35.116353', 'step': 13559, 'epoch': 2} {'type': 'loss', 'content': 0.07636146992444992, 'timestamp': '2025-09-30 22:33:35.128066', 'step': 13560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:35.186759', 'step': 13560, 'epoch': 2} {'type': 'loss', 'content': 0.12605859339237213, 'timestamp': '2025-09-30 22:33:35.201749', 'step': 13561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:35.272542', 'step': 13561, 'epoch': 2} {'type': 'loss', 'content': 0.06548406183719635, 'timestamp': '2025-09-30 22:33:35.278387', 'step': 13562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:35.348806', 'step': 13562, 'epoch': 2} {'type': 'loss', 'content': 0.12017014622688293, 'timestamp': '2025-09-30 22:33:35.352682', 'step': 13563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:35.411277', 'step': 13563, 'epoch': 2} {'type': 'loss', 'content': 0.12529343366622925, 'timestamp': '2025-09-30 22:33:35.427347', 'step': 13564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:35.524596', 'step': 13564, 'epoch': 2} {'type': 'loss', 'content': 0.1165519654750824, 'timestamp': '2025-09-30 22:33:35.531671', 'step': 13565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:35.590686', 'step': 13565, 'epoch': 2} {'type': 'loss', 'content': 0.11310151219367981, 'timestamp': '2025-09-30 22:33:35.603747', 'step': 13566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:35.701097', 'step': 13566, 'epoch': 2} {'type': 'loss', 'content': 0.20661059021949768, 'timestamp': '2025-09-30 22:33:35.712750', 'step': 13567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:35.772738', 'step': 13567, 'epoch': 2} {'type': 'loss', 'content': 0.12624527513980865, 'timestamp': '2025-09-30 22:33:35.787981', 'step': 13568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:35.846761', 'step': 13568, 'epoch': 2} {'type': 'loss', 'content': 0.15198560059070587, 'timestamp': '2025-09-30 22:33:35.855248', 'step': 13569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:35.923201', 'step': 13569, 'epoch': 2} {'type': 'loss', 'content': 0.09938600659370422, 'timestamp': '2025-09-30 22:33:35.939456', 'step': 13570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:36.028376', 'step': 13570, 'epoch': 2} {'type': 'loss', 'content': 0.12207917869091034, 'timestamp': '2025-09-30 22:33:36.045369', 'step': 13571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:36.109634', 'step': 13571, 'epoch': 2} {'type': 'loss', 'content': 0.14416995644569397, 'timestamp': '2025-09-30 22:33:36.127560', 'step': 13572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:36.186888', 'step': 13572, 'epoch': 2} {'type': 'loss', 'content': 0.13351210951805115, 'timestamp': '2025-09-30 22:33:36.200519', 'step': 13573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:36.259957', 'step': 13573, 'epoch': 2} {'type': 'loss', 'content': 0.10968612879514694, 'timestamp': '2025-09-30 22:33:36.269114', 'step': 13574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:36.345130', 'step': 13574, 'epoch': 2} {'type': 'loss', 'content': 0.08159831166267395, 'timestamp': '2025-09-30 22:33:36.353570', 'step': 13575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:36.426921', 'step': 13575, 'epoch': 2} {'type': 'loss', 'content': 0.16259406507015228, 'timestamp': '2025-09-30 22:33:36.467862', 'step': 13576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:36.535426', 'step': 13576, 'epoch': 2} {'type': 'loss', 'content': 0.15705080330371857, 'timestamp': '2025-09-30 22:33:36.546196', 'step': 13577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:36.629592', 'step': 13577, 'epoch': 2} {'type': 'loss', 'content': 0.09469394385814667, 'timestamp': '2025-09-30 22:33:36.634305', 'step': 13578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:36.695940', 'step': 13578, 'epoch': 2} {'type': 'loss', 'content': 0.09134399890899658, 'timestamp': '2025-09-30 22:33:36.707288', 'step': 13579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:36.766292', 'step': 13579, 'epoch': 2} {'type': 'loss', 'content': 0.1704331636428833, 'timestamp': '2025-09-30 22:33:36.788835', 'step': 13580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:36.855119', 'step': 13580, 'epoch': 2} {'type': 'loss', 'content': 0.08563899993896484, 'timestamp': '2025-09-30 22:33:36.865570', 'step': 13581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:36.923642', 'step': 13581, 'epoch': 2} {'type': 'loss', 'content': 0.15084245800971985, 'timestamp': '2025-09-30 22:33:36.928823', 'step': 13582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:37.001088', 'step': 13582, 'epoch': 2} {'type': 'loss', 'content': 0.1631246656179428, 'timestamp': '2025-09-30 22:33:37.005724', 'step': 13583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:37.072938', 'step': 13583, 'epoch': 2} {'type': 'loss', 'content': 0.08904573321342468, 'timestamp': '2025-09-30 22:33:37.087147', 'step': 13584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:37.145570', 'step': 13584, 'epoch': 2} {'type': 'loss', 'content': 0.1592542976140976, 'timestamp': '2025-09-30 22:33:37.150275', 'step': 13585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:37.214719', 'step': 13585, 'epoch': 2} {'type': 'loss', 'content': 0.15351955592632294, 'timestamp': '2025-09-30 22:33:37.228144', 'step': 13586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:37.285751', 'step': 13586, 'epoch': 2} {'type': 'loss', 'content': 0.06637666374444962, 'timestamp': '2025-09-30 22:33:37.289230', 'step': 13587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:37.349282', 'step': 13587, 'epoch': 2} {'type': 'loss', 'content': 0.16453313827514648, 'timestamp': '2025-09-30 22:33:37.358055', 'step': 13588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:37.417472', 'step': 13588, 'epoch': 2} {'type': 'loss', 'content': 0.11228340119123459, 'timestamp': '2025-09-30 22:33:37.421018', 'step': 13589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:37.481366', 'step': 13589, 'epoch': 2} {'type': 'loss', 'content': 0.13429132103919983, 'timestamp': '2025-09-30 22:33:37.491232', 'step': 13590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:37.551548', 'step': 13590, 'epoch': 2} {'type': 'loss', 'content': 0.09232518821954727, 'timestamp': '2025-09-30 22:33:37.556286', 'step': 13591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:37.615940', 'step': 13591, 'epoch': 2} {'type': 'loss', 'content': 0.22483515739440918, 'timestamp': '2025-09-30 22:33:37.631916', 'step': 13592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:37.689863', 'step': 13592, 'epoch': 2} {'type': 'loss', 'content': 0.10804654657840729, 'timestamp': '2025-09-30 22:33:37.702258', 'step': 13593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:37.761137', 'step': 13593, 'epoch': 2} {'type': 'loss', 'content': 0.08507154136896133, 'timestamp': '2025-09-30 22:33:37.774872', 'step': 13594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:37.851355', 'step': 13594, 'epoch': 2} {'type': 'loss', 'content': 0.15800094604492188, 'timestamp': '2025-09-30 22:33:37.855965', 'step': 13595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:37.914244', 'step': 13595, 'epoch': 2} {'type': 'loss', 'content': 0.09908560663461685, 'timestamp': '2025-09-30 22:33:37.921256', 'step': 13596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:37.979725', 'step': 13596, 'epoch': 2} {'type': 'loss', 'content': 0.1192852184176445, 'timestamp': '2025-09-30 22:33:37.985577', 'step': 13597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:38.052736', 'step': 13597, 'epoch': 2} {'type': 'loss', 'content': 0.19172006845474243, 'timestamp': '2025-09-30 22:33:38.057218', 'step': 13598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:38.118320', 'step': 13598, 'epoch': 2} {'type': 'loss', 'content': 0.18437276780605316, 'timestamp': '2025-09-30 22:33:38.122768', 'step': 13599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:38.208109', 'step': 13599, 'epoch': 2} {'type': 'loss', 'content': 0.08146295696496964, 'timestamp': '2025-09-30 22:33:38.222430', 'step': 13600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:38.291127', 'step': 13600, 'epoch': 2} {'type': 'loss', 'content': 0.087307870388031, 'timestamp': '2025-09-30 22:33:38.304329', 'step': 13601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:38.364251', 'step': 13601, 'epoch': 2} {'type': 'loss', 'content': 0.21085388958454132, 'timestamp': '2025-09-30 22:33:38.375965', 'step': 13602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:38.439435', 'step': 13602, 'epoch': 2} {'type': 'loss', 'content': 0.13909947872161865, 'timestamp': '2025-09-30 22:33:38.447892', 'step': 13603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:38.521231', 'step': 13603, 'epoch': 2} {'type': 'loss', 'content': 0.07410670816898346, 'timestamp': '2025-09-30 22:33:38.530448', 'step': 13604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:38.609125', 'step': 13604, 'epoch': 2} {'type': 'loss', 'content': 0.1684286892414093, 'timestamp': '2025-09-30 22:33:38.615270', 'step': 13605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:38.674079', 'step': 13605, 'epoch': 2} {'type': 'loss', 'content': 0.10479707270860672, 'timestamp': '2025-09-30 22:33:38.678005', 'step': 13606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:38.747242', 'step': 13606, 'epoch': 2} {'type': 'loss', 'content': 0.14538240432739258, 'timestamp': '2025-09-30 22:33:38.761187', 'step': 13607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:38.819674', 'step': 13607, 'epoch': 2} {'type': 'loss', 'content': 0.11802902072668076, 'timestamp': '2025-09-30 22:33:38.828577', 'step': 13608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:38.886394', 'step': 13608, 'epoch': 2} {'type': 'loss', 'content': 0.1137840524315834, 'timestamp': '2025-09-30 22:33:38.900625', 'step': 13609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:38.961829', 'step': 13609, 'epoch': 2} {'type': 'loss', 'content': 0.13245294988155365, 'timestamp': '2025-09-30 22:33:38.967417', 'step': 13610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:39.039496', 'step': 13610, 'epoch': 2} {'type': 'loss', 'content': 0.13046371936798096, 'timestamp': '2025-09-30 22:33:39.044988', 'step': 13611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:39.104120', 'step': 13611, 'epoch': 2} {'type': 'loss', 'content': 0.11885172873735428, 'timestamp': '2025-09-30 22:33:39.112821', 'step': 13612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:39.171326', 'step': 13612, 'epoch': 2} {'type': 'loss', 'content': 0.19981351494789124, 'timestamp': '2025-09-30 22:33:39.184608', 'step': 13613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:39.242971', 'step': 13613, 'epoch': 2} {'type': 'loss', 'content': 0.07945311069488525, 'timestamp': '2025-09-30 22:33:39.256178', 'step': 13614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:39.315558', 'step': 13614, 'epoch': 2} {'type': 'loss', 'content': 0.15623976290225983, 'timestamp': '2025-09-30 22:33:39.330761', 'step': 13615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:39.397925', 'step': 13615, 'epoch': 2} {'type': 'loss', 'content': 0.19912269711494446, 'timestamp': '2025-09-30 22:33:39.409542', 'step': 13616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:39.468132', 'step': 13616, 'epoch': 2} {'type': 'loss', 'content': 0.12810836732387543, 'timestamp': '2025-09-30 22:33:39.473740', 'step': 13617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:39.533959', 'step': 13617, 'epoch': 2} {'type': 'loss', 'content': 0.22988486289978027, 'timestamp': '2025-09-30 22:33:39.538462', 'step': 13618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:39.596830', 'step': 13618, 'epoch': 2} {'type': 'loss', 'content': 0.1368425488471985, 'timestamp': '2025-09-30 22:33:39.601689', 'step': 13619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:39.669861', 'step': 13619, 'epoch': 2} {'type': 'loss', 'content': 0.07663310319185257, 'timestamp': '2025-09-30 22:33:39.679663', 'step': 13620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:39.748577', 'step': 13620, 'epoch': 2} {'type': 'loss', 'content': 0.08806493878364563, 'timestamp': '2025-09-30 22:33:39.757292', 'step': 13621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:39.815557', 'step': 13621, 'epoch': 2} {'type': 'loss', 'content': 0.11675973981618881, 'timestamp': '2025-09-30 22:33:39.819369', 'step': 13622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:39.877975', 'step': 13622, 'epoch': 2} {'type': 'loss', 'content': 0.08188246190547943, 'timestamp': '2025-09-30 22:33:39.881768', 'step': 13623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:39.939461', 'step': 13623, 'epoch': 2} {'type': 'loss', 'content': 0.11791533976793289, 'timestamp': '2025-09-30 22:33:39.947727', 'step': 13624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:40.013835', 'step': 13624, 'epoch': 2} {'type': 'loss', 'content': 0.08653461933135986, 'timestamp': '2025-09-30 22:33:40.027031', 'step': 13625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:40.094965', 'step': 13625, 'epoch': 2} {'type': 'loss', 'content': 0.07515688985586166, 'timestamp': '2025-09-30 22:33:40.101348', 'step': 13626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:40.159423', 'step': 13626, 'epoch': 2} {'type': 'loss', 'content': 0.13818980753421783, 'timestamp': '2025-09-30 22:33:40.162793', 'step': 13627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:40.221588', 'step': 13627, 'epoch': 2} {'type': 'loss', 'content': 0.11970159411430359, 'timestamp': '2025-09-30 22:33:40.228873', 'step': 13628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:40.295230', 'step': 13628, 'epoch': 2} {'type': 'loss', 'content': 0.13052396476268768, 'timestamp': '2025-09-30 22:33:40.300296', 'step': 13629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:40.366038', 'step': 13629, 'epoch': 2} {'type': 'loss', 'content': 0.08112971484661102, 'timestamp': '2025-09-30 22:33:40.373348', 'step': 13630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:40.431693', 'step': 13630, 'epoch': 2} {'type': 'loss', 'content': 0.1323312520980835, 'timestamp': '2025-09-30 22:33:40.435160', 'step': 13631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:40.494115', 'step': 13631, 'epoch': 2} {'type': 'loss', 'content': 0.0909755676984787, 'timestamp': '2025-09-30 22:33:40.501977', 'step': 13632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:40.560611', 'step': 13632, 'epoch': 2} {'type': 'loss', 'content': 0.06680352985858917, 'timestamp': '2025-09-30 22:33:40.565444', 'step': 13633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:40.636264', 'step': 13633, 'epoch': 2} {'type': 'loss', 'content': 0.17161405086517334, 'timestamp': '2025-09-30 22:33:40.640230', 'step': 13634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:40.698218', 'step': 13634, 'epoch': 2} {'type': 'loss', 'content': 0.11280730366706848, 'timestamp': '2025-09-30 22:33:40.702629', 'step': 13635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:40.760201', 'step': 13635, 'epoch': 2} {'type': 'loss', 'content': 0.09977598488330841, 'timestamp': '2025-09-30 22:33:40.766873', 'step': 13636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:40.840609', 'step': 13636, 'epoch': 2} {'type': 'loss', 'content': 0.12934206426143646, 'timestamp': '2025-09-30 22:33:40.849734', 'step': 13637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:40.908131', 'step': 13637, 'epoch': 2} {'type': 'loss', 'content': 0.17742839455604553, 'timestamp': '2025-09-30 22:33:40.919188', 'step': 13638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:40.976294', 'step': 13638, 'epoch': 2} {'type': 'loss', 'content': 0.05749534070491791, 'timestamp': '2025-09-30 22:33:40.988900', 'step': 13639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:41.057309', 'step': 13639, 'epoch': 2} {'type': 'loss', 'content': 0.14341188967227936, 'timestamp': '2025-09-30 22:33:41.063708', 'step': 13640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:41.121246', 'step': 13640, 'epoch': 2} {'type': 'loss', 'content': 0.09013230353593826, 'timestamp': '2025-09-30 22:33:41.124256', 'step': 13641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:41.182599', 'step': 13641, 'epoch': 2} {'type': 'loss', 'content': 0.09409499913454056, 'timestamp': '2025-09-30 22:33:41.186398', 'step': 13642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:41.244139', 'step': 13642, 'epoch': 2} {'type': 'loss', 'content': 0.1442043036222458, 'timestamp': '2025-09-30 22:33:41.247065', 'step': 13643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:41.304141', 'step': 13643, 'epoch': 2} {'type': 'loss', 'content': 0.12557590007781982, 'timestamp': '2025-09-30 22:33:41.311525', 'step': 13644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:41.367517', 'step': 13644, 'epoch': 2} {'type': 'loss', 'content': 0.09572529047727585, 'timestamp': '2025-09-30 22:33:41.370895', 'step': 13645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:41.440495', 'step': 13645, 'epoch': 2} {'type': 'loss', 'content': 0.08945713192224503, 'timestamp': '2025-09-30 22:33:41.443766', 'step': 13646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:41.500684', 'step': 13646, 'epoch': 2} {'type': 'loss', 'content': 0.13742244243621826, 'timestamp': '2025-09-30 22:33:41.503871', 'step': 13647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:41.561511', 'step': 13647, 'epoch': 2} {'type': 'loss', 'content': 0.09963595122098923, 'timestamp': '2025-09-30 22:33:41.568643', 'step': 13648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:41.636040', 'step': 13648, 'epoch': 2} {'type': 'loss', 'content': 0.06905990839004517, 'timestamp': '2025-09-30 22:33:41.639478', 'step': 13649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:41.707430', 'step': 13649, 'epoch': 2} {'type': 'loss', 'content': 0.16610725224018097, 'timestamp': '2025-09-30 22:33:41.710555', 'step': 13650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:41.768702', 'step': 13650, 'epoch': 2} {'type': 'loss', 'content': 0.06957284361124039, 'timestamp': '2025-09-30 22:33:41.771482', 'step': 13651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:41.828670', 'step': 13651, 'epoch': 2} {'type': 'loss', 'content': 0.21626393496990204, 'timestamp': '2025-09-30 22:33:41.836537', 'step': 13652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:41.895056', 'step': 13652, 'epoch': 2} {'type': 'loss', 'content': 0.15143857896327972, 'timestamp': '2025-09-30 22:33:41.897977', 'step': 13653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:41.955630', 'step': 13653, 'epoch': 2} {'type': 'loss', 'content': 0.17660273611545563, 'timestamp': '2025-09-30 22:33:41.958663', 'step': 13654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:42.015380', 'step': 13654, 'epoch': 2} {'type': 'loss', 'content': 0.12694212794303894, 'timestamp': '2025-09-30 22:33:42.018370', 'step': 13655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:42.075636', 'step': 13655, 'epoch': 2} {'type': 'loss', 'content': 0.06568899005651474, 'timestamp': '2025-09-30 22:33:42.082998', 'step': 13656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:42.139362', 'step': 13656, 'epoch': 2} {'type': 'loss', 'content': 0.1392013132572174, 'timestamp': '2025-09-30 22:33:42.143730', 'step': 13657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:42.201616', 'step': 13657, 'epoch': 2} {'type': 'loss', 'content': 0.04768295958638191, 'timestamp': '2025-09-30 22:33:42.206374', 'step': 13658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:42.264313', 'step': 13658, 'epoch': 2} {'type': 'loss', 'content': 0.11090786755084991, 'timestamp': '2025-09-30 22:33:42.273410', 'step': 13659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:42.331767', 'step': 13659, 'epoch': 2} {'type': 'loss', 'content': 0.08514383435249329, 'timestamp': '2025-09-30 22:33:42.338543', 'step': 13660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:42.396558', 'step': 13660, 'epoch': 2} {'type': 'loss', 'content': 0.18105299770832062, 'timestamp': '2025-09-30 22:33:42.400414', 'step': 13661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:42.464203', 'step': 13661, 'epoch': 2} {'type': 'loss', 'content': 0.11767423152923584, 'timestamp': '2025-09-30 22:33:42.467513', 'step': 13662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:42.537395', 'step': 13662, 'epoch': 2} {'type': 'loss', 'content': 0.12043184041976929, 'timestamp': '2025-09-30 22:33:42.541651', 'step': 13663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:42.599854', 'step': 13663, 'epoch': 2} {'type': 'loss', 'content': 0.1464499980211258, 'timestamp': '2025-09-30 22:33:42.606219', 'step': 13664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:42.666321', 'step': 13664, 'epoch': 2} {'type': 'loss', 'content': 0.10152270644903183, 'timestamp': '2025-09-30 22:33:42.673457', 'step': 13665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:42.730747', 'step': 13665, 'epoch': 2} {'type': 'loss', 'content': 0.2436559647321701, 'timestamp': '2025-09-30 22:33:42.733068', 'step': 13666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:42.791363', 'step': 13666, 'epoch': 2} {'type': 'loss', 'content': 0.09337056428194046, 'timestamp': '2025-09-30 22:33:42.803117', 'step': 13667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:42.862106', 'step': 13667, 'epoch': 2} {'type': 'loss', 'content': 0.11421778798103333, 'timestamp': '2025-09-30 22:33:42.869038', 'step': 13668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:42.926837', 'step': 13668, 'epoch': 2} {'type': 'loss', 'content': 0.15928509831428528, 'timestamp': '2025-09-30 22:33:42.929807', 'step': 13669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:42.987568', 'step': 13669, 'epoch': 2} {'type': 'loss', 'content': 0.1591598391532898, 'timestamp': '2025-09-30 22:33:42.990853', 'step': 13670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:43.049267', 'step': 13670, 'epoch': 2} {'type': 'loss', 'content': 0.14970290660858154, 'timestamp': '2025-09-30 22:33:43.056924', 'step': 13671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:43.114953', 'step': 13671, 'epoch': 2} {'type': 'loss', 'content': 0.1291920244693756, 'timestamp': '2025-09-30 22:33:43.127519', 'step': 13672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:43.188919', 'step': 13672, 'epoch': 2} {'type': 'loss', 'content': 0.1596970111131668, 'timestamp': '2025-09-30 22:33:43.193710', 'step': 13673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:43.251100', 'step': 13673, 'epoch': 2} {'type': 'loss', 'content': 0.11394597589969635, 'timestamp': '2025-09-30 22:33:43.260035', 'step': 13674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:43.320788', 'step': 13674, 'epoch': 2} {'type': 'loss', 'content': 0.05271513760089874, 'timestamp': '2025-09-30 22:33:43.326559', 'step': 13675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:43.400464', 'step': 13675, 'epoch': 2} {'type': 'loss', 'content': 0.12594540417194366, 'timestamp': '2025-09-30 22:33:43.408455', 'step': 13676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:43.472555', 'step': 13676, 'epoch': 2} {'type': 'loss', 'content': 0.1145019680261612, 'timestamp': '2025-09-30 22:33:43.476800', 'step': 13677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:43.534251', 'step': 13677, 'epoch': 2} {'type': 'loss', 'content': 0.1835608184337616, 'timestamp': '2025-09-30 22:33:43.537410', 'step': 13678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:43.595268', 'step': 13678, 'epoch': 2} {'type': 'loss', 'content': 0.23272807896137238, 'timestamp': '2025-09-30 22:33:43.597958', 'step': 13679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:43.687952', 'step': 13679, 'epoch': 2} {'type': 'loss', 'content': 0.11332450807094574, 'timestamp': '2025-09-30 22:33:43.694544', 'step': 13680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:43.752389', 'step': 13680, 'epoch': 2} {'type': 'loss', 'content': 0.10541704297065735, 'timestamp': '2025-09-30 22:33:43.755005', 'step': 13681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:43.811801', 'step': 13681, 'epoch': 2} {'type': 'loss', 'content': 0.11016283929347992, 'timestamp': '2025-09-30 22:33:43.814530', 'step': 13682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:43.871804', 'step': 13682, 'epoch': 2} {'type': 'loss', 'content': 0.0879833772778511, 'timestamp': '2025-09-30 22:33:43.878040', 'step': 13683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:43.934810', 'step': 13683, 'epoch': 2} {'type': 'loss', 'content': 0.1010524332523346, 'timestamp': '2025-09-30 22:33:43.940955', 'step': 13684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:44.002821', 'step': 13684, 'epoch': 2} {'type': 'loss', 'content': 0.05107893794775009, 'timestamp': '2025-09-30 22:33:44.012278', 'step': 13685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:44.074985', 'step': 13685, 'epoch': 2} {'type': 'loss', 'content': 0.11465591937303543, 'timestamp': '2025-09-30 22:33:44.080984', 'step': 13686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:44.140543', 'step': 13686, 'epoch': 2} {'type': 'loss', 'content': 0.15324917435646057, 'timestamp': '2025-09-30 22:33:44.143131', 'step': 13687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:44.209727', 'step': 13687, 'epoch': 2} {'type': 'loss', 'content': 0.2178555727005005, 'timestamp': '2025-09-30 22:33:44.220921', 'step': 13688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:44.278528', 'step': 13688, 'epoch': 2} {'type': 'loss', 'content': 0.1119166687130928, 'timestamp': '2025-09-30 22:33:44.281076', 'step': 13689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:44.339743', 'step': 13689, 'epoch': 2} {'type': 'loss', 'content': 0.11375496536493301, 'timestamp': '2025-09-30 22:33:44.342704', 'step': 13690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:44.403232', 'step': 13690, 'epoch': 2} {'type': 'loss', 'content': 0.08450599759817123, 'timestamp': '2025-09-30 22:33:44.408983', 'step': 13691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:44.478609', 'step': 13691, 'epoch': 2} {'type': 'loss', 'content': 0.20962846279144287, 'timestamp': '2025-09-30 22:33:44.485108', 'step': 13692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:44.542944', 'step': 13692, 'epoch': 2} {'type': 'loss', 'content': 0.09960203617811203, 'timestamp': '2025-09-30 22:33:44.546196', 'step': 13693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:44.603386', 'step': 13693, 'epoch': 2} {'type': 'loss', 'content': 0.1373550295829773, 'timestamp': '2025-09-30 22:33:44.605309', 'step': 13694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:44.663128', 'step': 13694, 'epoch': 2} {'type': 'loss', 'content': 0.1599210798740387, 'timestamp': '2025-09-30 22:33:44.665808', 'step': 13695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:44.724349', 'step': 13695, 'epoch': 2} {'type': 'loss', 'content': 0.10296424478292465, 'timestamp': '2025-09-30 22:33:44.732024', 'step': 13696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:44.790472', 'step': 13696, 'epoch': 2} {'type': 'loss', 'content': 0.11085545271635056, 'timestamp': '2025-09-30 22:33:44.794528', 'step': 13697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:44.852104', 'step': 13697, 'epoch': 2} {'type': 'loss', 'content': 0.07119140028953552, 'timestamp': '2025-09-30 22:33:44.855001', 'step': 13698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:44.913269', 'step': 13698, 'epoch': 2} {'type': 'loss', 'content': 0.1704896092414856, 'timestamp': '2025-09-30 22:33:44.921142', 'step': 13699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:44.979435', 'step': 13699, 'epoch': 2} {'type': 'loss', 'content': 0.1491861641407013, 'timestamp': '2025-09-30 22:33:44.990566', 'step': 13700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:45.047588', 'step': 13700, 'epoch': 2} {'type': 'loss', 'content': 0.13259544968605042, 'timestamp': '2025-09-30 22:33:45.054789', 'step': 13701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:45.112464', 'step': 13701, 'epoch': 2} {'type': 'loss', 'content': 0.20535102486610413, 'timestamp': '2025-09-30 22:33:45.114851', 'step': 13702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:45.180656', 'step': 13702, 'epoch': 2} {'type': 'loss', 'content': 0.09766000509262085, 'timestamp': '2025-09-30 22:33:45.188725', 'step': 13703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:45.250981', 'step': 13703, 'epoch': 2} {'type': 'loss', 'content': 0.1189337894320488, 'timestamp': '2025-09-30 22:33:45.260898', 'step': 13704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:45.319239', 'step': 13704, 'epoch': 2} {'type': 'loss', 'content': 0.041332799941301346, 'timestamp': '2025-09-30 22:33:45.322067', 'step': 13705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:45.385093', 'step': 13705, 'epoch': 2} {'type': 'loss', 'content': 0.06871497631072998, 'timestamp': '2025-09-30 22:33:45.394214', 'step': 13706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:45.470025', 'step': 13706, 'epoch': 2} {'type': 'loss', 'content': 0.12818212807178497, 'timestamp': '2025-09-30 22:33:45.477747', 'step': 13707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:45.539634', 'step': 13707, 'epoch': 2} {'type': 'loss', 'content': 0.08260423690080643, 'timestamp': '2025-09-30 22:33:45.546244', 'step': 13708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:45.603642', 'step': 13708, 'epoch': 2} {'type': 'loss', 'content': 0.09742321819067001, 'timestamp': '2025-09-30 22:33:45.609546', 'step': 13709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:45.667634', 'step': 13709, 'epoch': 2} {'type': 'loss', 'content': 0.16502682864665985, 'timestamp': '2025-09-30 22:33:45.670955', 'step': 13710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:45.736898', 'step': 13710, 'epoch': 2} {'type': 'loss', 'content': 0.10572872310876846, 'timestamp': '2025-09-30 22:33:45.743534', 'step': 13711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:45.800112', 'step': 13711, 'epoch': 2} {'type': 'loss', 'content': 0.08866868913173676, 'timestamp': '2025-09-30 22:33:45.806240', 'step': 13712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:45.862545', 'step': 13712, 'epoch': 2} {'type': 'loss', 'content': 0.0644284337759018, 'timestamp': '2025-09-30 22:33:45.864791', 'step': 13713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:45.920943', 'step': 13713, 'epoch': 2} {'type': 'loss', 'content': 0.21147868037223816, 'timestamp': '2025-09-30 22:33:45.924209', 'step': 13714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:45.981412', 'step': 13714, 'epoch': 2} {'type': 'loss', 'content': 0.17096713185310364, 'timestamp': '2025-09-30 22:33:45.984428', 'step': 13715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:33:46.041006', 'step': 13715, 'epoch': 2} {'type': 'loss', 'content': 0.0950646847486496, 'timestamp': '2025-09-30 22:33:46.047693', 'step': 13716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:46.105721', 'step': 13716, 'epoch': 2} {'type': 'loss', 'content': 0.10321583598852158, 'timestamp': '2025-09-30 22:33:46.108556', 'step': 13717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:46.165905', 'step': 13717, 'epoch': 2} {'type': 'loss', 'content': 0.10433565825223923, 'timestamp': '2025-09-30 22:33:46.168770', 'step': 13718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:33:46.228439', 'step': 13718, 'epoch': 2} {'type': 'loss', 'content': 0.1560383290052414, 'timestamp': '2025-09-30 22:33:46.238200', 'step': 13719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:46.296351', 'step': 13719, 'epoch': 2} {'type': 'loss', 'content': 0.10677961260080338, 'timestamp': '2025-09-30 22:33:46.303472', 'step': 13720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:46.360650', 'step': 13720, 'epoch': 2} {'type': 'loss', 'content': 0.04452148824930191, 'timestamp': '2025-09-30 22:33:46.363059', 'step': 13721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:46.420237', 'step': 13721, 'epoch': 2} {'type': 'loss', 'content': 0.048777855932712555, 'timestamp': '2025-09-30 22:33:46.423941', 'step': 13722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:46.481785', 'step': 13722, 'epoch': 2} {'type': 'loss', 'content': 0.1257314532995224, 'timestamp': '2025-09-30 22:33:46.484260', 'step': 13723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:46.541942', 'step': 13723, 'epoch': 2} {'type': 'loss', 'content': 0.13468459248542786, 'timestamp': '2025-09-30 22:33:46.552316', 'step': 13724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:46.609842', 'step': 13724, 'epoch': 2} {'type': 'loss', 'content': 0.11089540272951126, 'timestamp': '2025-09-30 22:33:46.612295', 'step': 13725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:46.669880', 'step': 13725, 'epoch': 2} {'type': 'loss', 'content': 0.16266676783561707, 'timestamp': '2025-09-30 22:33:46.673204', 'step': 13726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:46.738954', 'step': 13726, 'epoch': 2} {'type': 'loss', 'content': 0.12064206600189209, 'timestamp': '2025-09-30 22:33:46.749954', 'step': 13727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:46.807292', 'step': 13727, 'epoch': 2} {'type': 'loss', 'content': 0.14535237848758698, 'timestamp': '2025-09-30 22:33:46.819471', 'step': 13728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:46.876349', 'step': 13728, 'epoch': 2} {'type': 'loss', 'content': 0.1229911521077156, 'timestamp': '2025-09-30 22:33:46.879850', 'step': 13729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:46.937688', 'step': 13729, 'epoch': 2} {'type': 'loss', 'content': 0.13965408504009247, 'timestamp': '2025-09-30 22:33:46.942740', 'step': 13730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:47.003188', 'step': 13730, 'epoch': 2} {'type': 'loss', 'content': 0.13996294140815735, 'timestamp': '2025-09-30 22:33:47.010757', 'step': 13731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:47.082850', 'step': 13731, 'epoch': 2} {'type': 'loss', 'content': 0.10611356794834137, 'timestamp': '2025-09-30 22:33:47.089445', 'step': 13732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:47.165326', 'step': 13732, 'epoch': 2} {'type': 'loss', 'content': 0.08331326395273209, 'timestamp': '2025-09-30 22:33:47.171910', 'step': 13733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:47.232743', 'step': 13733, 'epoch': 2} {'type': 'loss', 'content': 0.15564514696598053, 'timestamp': '2025-09-30 22:33:47.239150', 'step': 13734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:47.300203', 'step': 13734, 'epoch': 2} {'type': 'loss', 'content': 0.10896343737840652, 'timestamp': '2025-09-30 22:33:47.305663', 'step': 13735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:47.367035', 'step': 13735, 'epoch': 2} {'type': 'loss', 'content': 0.23223534226417542, 'timestamp': '2025-09-30 22:33:47.376971', 'step': 13736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:47.436928', 'step': 13736, 'epoch': 2} {'type': 'loss', 'content': 0.08438645303249359, 'timestamp': '2025-09-30 22:33:47.439981', 'step': 13737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:47.500439', 'step': 13737, 'epoch': 2} {'type': 'loss', 'content': 0.16959123313426971, 'timestamp': '2025-09-30 22:33:47.504221', 'step': 13738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:47.562137', 'step': 13738, 'epoch': 2} {'type': 'loss', 'content': 0.09069667011499405, 'timestamp': '2025-09-30 22:33:47.565286', 'step': 13739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:47.630956', 'step': 13739, 'epoch': 2} {'type': 'loss', 'content': 0.11142230778932571, 'timestamp': '2025-09-30 22:33:47.638325', 'step': 13740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:47.712871', 'step': 13740, 'epoch': 2} {'type': 'loss', 'content': 0.13735610246658325, 'timestamp': '2025-09-30 22:33:47.716766', 'step': 13741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:47.778224', 'step': 13741, 'epoch': 2} {'type': 'loss', 'content': 0.07636073231697083, 'timestamp': '2025-09-30 22:33:47.782101', 'step': 13742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:47.840645', 'step': 13742, 'epoch': 2} {'type': 'loss', 'content': 0.2239702194929123, 'timestamp': '2025-09-30 22:33:47.848855', 'step': 13743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:47.924675', 'step': 13743, 'epoch': 2} {'type': 'loss', 'content': 0.12618538737297058, 'timestamp': '2025-09-30 22:33:47.931687', 'step': 13744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:47.992099', 'step': 13744, 'epoch': 2} {'type': 'loss', 'content': 0.11596924811601639, 'timestamp': '2025-09-30 22:33:47.994932', 'step': 13745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:48.056331', 'step': 13745, 'epoch': 2} {'type': 'loss', 'content': 0.1539696753025055, 'timestamp': '2025-09-30 22:33:48.061331', 'step': 13746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:48.122321', 'step': 13746, 'epoch': 2} {'type': 'loss', 'content': 0.13185425102710724, 'timestamp': '2025-09-30 22:33:48.129607', 'step': 13747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:48.194507', 'step': 13747, 'epoch': 2} {'type': 'loss', 'content': 0.17049899697303772, 'timestamp': '2025-09-30 22:33:48.205941', 'step': 13748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:48.263895', 'step': 13748, 'epoch': 2} {'type': 'loss', 'content': 0.13400006294250488, 'timestamp': '2025-09-30 22:33:48.274324', 'step': 13749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:48.332043', 'step': 13749, 'epoch': 2} {'type': 'loss', 'content': 0.12701842188835144, 'timestamp': '2025-09-30 22:33:48.334712', 'step': 13750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:48.411324', 'step': 13750, 'epoch': 2} {'type': 'loss', 'content': 0.18422389030456543, 'timestamp': '2025-09-30 22:33:48.420353', 'step': 13751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:48.482752', 'step': 13751, 'epoch': 2} {'type': 'loss', 'content': 0.2052944153547287, 'timestamp': '2025-09-30 22:33:48.493704', 'step': 13752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:48.553373', 'step': 13752, 'epoch': 2} {'type': 'loss', 'content': 0.0674242377281189, 'timestamp': '2025-09-30 22:33:48.559958', 'step': 13753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:48.625223', 'step': 13753, 'epoch': 2} {'type': 'loss', 'content': 0.2608368694782257, 'timestamp': '2025-09-30 22:33:48.627775', 'step': 13754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:48.690700', 'step': 13754, 'epoch': 2} {'type': 'loss', 'content': 0.17781661450862885, 'timestamp': '2025-09-30 22:33:48.696456', 'step': 13755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:48.753781', 'step': 13755, 'epoch': 2} {'type': 'loss', 'content': 0.06576042622327805, 'timestamp': '2025-09-30 22:33:48.759743', 'step': 13756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:48.822792', 'step': 13756, 'epoch': 2} {'type': 'loss', 'content': 0.08457956463098526, 'timestamp': '2025-09-30 22:33:48.831462', 'step': 13757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:48.895813', 'step': 13757, 'epoch': 2} {'type': 'loss', 'content': 0.13739100098609924, 'timestamp': '2025-09-30 22:33:48.898158', 'step': 13758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:48.958804', 'step': 13758, 'epoch': 2} {'type': 'loss', 'content': 0.12978072464466095, 'timestamp': '2025-09-30 22:33:48.967140', 'step': 13759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:49.033578', 'step': 13759, 'epoch': 2} {'type': 'loss', 'content': 0.19908678531646729, 'timestamp': '2025-09-30 22:33:49.043587', 'step': 13760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:49.105029', 'step': 13760, 'epoch': 2} {'type': 'loss', 'content': 0.14207416772842407, 'timestamp': '2025-09-30 22:33:49.108568', 'step': 13761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:49.167948', 'step': 13761, 'epoch': 2} {'type': 'loss', 'content': 0.08855921775102615, 'timestamp': '2025-09-30 22:33:49.178466', 'step': 13762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:49.237620', 'step': 13762, 'epoch': 2} {'type': 'loss', 'content': 0.07500049471855164, 'timestamp': '2025-09-30 22:33:49.244376', 'step': 13763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:49.306758', 'step': 13763, 'epoch': 2} {'type': 'loss', 'content': 0.10858122259378433, 'timestamp': '2025-09-30 22:33:49.313419', 'step': 13764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:49.369587', 'step': 13764, 'epoch': 2} {'type': 'loss', 'content': 0.11965227872133255, 'timestamp': '2025-09-30 22:33:49.372045', 'step': 13765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:49.428541', 'step': 13765, 'epoch': 2} {'type': 'loss', 'content': 0.09576842933893204, 'timestamp': '2025-09-30 22:33:49.437279', 'step': 13766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:49.499689', 'step': 13766, 'epoch': 2} {'type': 'loss', 'content': 0.19294695556163788, 'timestamp': '2025-09-30 22:33:49.502114', 'step': 13767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:33:49.562706', 'step': 13767, 'epoch': 2} {'type': 'loss', 'content': 0.05265074595808983, 'timestamp': '2025-09-30 22:33:49.569827', 'step': 13768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:49.630153', 'step': 13768, 'epoch': 2} {'type': 'loss', 'content': 0.07009109109640121, 'timestamp': '2025-09-30 22:33:49.632604', 'step': 13769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:49.696158', 'step': 13769, 'epoch': 2} {'type': 'loss', 'content': 0.12269491702318192, 'timestamp': '2025-09-30 22:33:49.699843', 'step': 13770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:49.757095', 'step': 13770, 'epoch': 2} {'type': 'loss', 'content': 0.12300849705934525, 'timestamp': '2025-09-30 22:33:49.760513', 'step': 13771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:49.818066', 'step': 13771, 'epoch': 2} {'type': 'loss', 'content': 0.04558907449245453, 'timestamp': '2025-09-30 22:33:49.830878', 'step': 13772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:49.889024', 'step': 13772, 'epoch': 2} {'type': 'loss', 'content': 0.11494473367929459, 'timestamp': '2025-09-30 22:33:49.891493', 'step': 13773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:49.955050', 'step': 13773, 'epoch': 2} {'type': 'loss', 'content': 0.1274750828742981, 'timestamp': '2025-09-30 22:33:49.963069', 'step': 13774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:50.025306', 'step': 13774, 'epoch': 2} {'type': 'loss', 'content': 0.19828113913536072, 'timestamp': '2025-09-30 22:33:50.028181', 'step': 13775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:50.089069', 'step': 13775, 'epoch': 2} {'type': 'loss', 'content': 0.1819361001253128, 'timestamp': '2025-09-30 22:33:50.098110', 'step': 13776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:50.155468', 'step': 13776, 'epoch': 2} {'type': 'loss', 'content': 0.19671911001205444, 'timestamp': '2025-09-30 22:33:50.161985', 'step': 13777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:50.229381', 'step': 13777, 'epoch': 2} {'type': 'loss', 'content': 0.29977670311927795, 'timestamp': '2025-09-30 22:33:50.231816', 'step': 13778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:50.287797', 'step': 13778, 'epoch': 2} {'type': 'loss', 'content': 0.20465999841690063, 'timestamp': '2025-09-30 22:33:50.290200', 'step': 13779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:50.346397', 'step': 13779, 'epoch': 2} {'type': 'loss', 'content': 0.1173565611243248, 'timestamp': '2025-09-30 22:33:50.352585', 'step': 13780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:50.410606', 'step': 13780, 'epoch': 2} {'type': 'loss', 'content': 0.17186322808265686, 'timestamp': '2025-09-30 22:33:50.417616', 'step': 13781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:50.484219', 'step': 13781, 'epoch': 2} {'type': 'loss', 'content': 0.20731617510318756, 'timestamp': '2025-09-30 22:33:50.486997', 'step': 13782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:50.545166', 'step': 13782, 'epoch': 2} {'type': 'loss', 'content': 0.10226743668317795, 'timestamp': '2025-09-30 22:33:50.547786', 'step': 13783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:50.607637', 'step': 13783, 'epoch': 2} {'type': 'loss', 'content': 0.09248548746109009, 'timestamp': '2025-09-30 22:33:50.616017', 'step': 13784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:50.672086', 'step': 13784, 'epoch': 2} {'type': 'loss', 'content': 0.15868094563484192, 'timestamp': '2025-09-30 22:33:50.675272', 'step': 13785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:50.737120', 'step': 13785, 'epoch': 2} {'type': 'loss', 'content': 0.08396931737661362, 'timestamp': '2025-09-30 22:33:50.745178', 'step': 13786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:50.801303', 'step': 13786, 'epoch': 2} {'type': 'loss', 'content': 0.06892088800668716, 'timestamp': '2025-09-30 22:33:50.804091', 'step': 13787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:50.861222', 'step': 13787, 'epoch': 2} {'type': 'loss', 'content': 0.09855406731367111, 'timestamp': '2025-09-30 22:33:50.870439', 'step': 13788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:50.933298', 'step': 13788, 'epoch': 2} {'type': 'loss', 'content': 0.11256521940231323, 'timestamp': '2025-09-30 22:33:50.936423', 'step': 13789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:50.995764', 'step': 13789, 'epoch': 2} {'type': 'loss', 'content': 0.10224985331296921, 'timestamp': '2025-09-30 22:33:50.998800', 'step': 13790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:51.060119', 'step': 13790, 'epoch': 2} {'type': 'loss', 'content': 0.12230636179447174, 'timestamp': '2025-09-30 22:33:51.067974', 'step': 13791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:51.127465', 'step': 13791, 'epoch': 2} {'type': 'loss', 'content': 0.09059564024209976, 'timestamp': '2025-09-30 22:33:51.136768', 'step': 13792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:51.196644', 'step': 13792, 'epoch': 2} {'type': 'loss', 'content': 0.06365185976028442, 'timestamp': '2025-09-30 22:33:51.200307', 'step': 13793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:51.256948', 'step': 13793, 'epoch': 2} {'type': 'loss', 'content': 0.1405784636735916, 'timestamp': '2025-09-30 22:33:51.277436', 'step': 13794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:51.334136', 'step': 13794, 'epoch': 2} {'type': 'loss', 'content': 0.1219225749373436, 'timestamp': '2025-09-30 22:33:51.337275', 'step': 13795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:51.394118', 'step': 13795, 'epoch': 2} {'type': 'loss', 'content': 0.15808340907096863, 'timestamp': '2025-09-30 22:33:51.400127', 'step': 13796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:51.458951', 'step': 13796, 'epoch': 2} {'type': 'loss', 'content': 0.10567479580640793, 'timestamp': '2025-09-30 22:33:51.461393', 'step': 13797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:51.521960', 'step': 13797, 'epoch': 2} {'type': 'loss', 'content': 0.11892940104007721, 'timestamp': '2025-09-30 22:33:51.529570', 'step': 13798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:51.585773', 'step': 13798, 'epoch': 2} {'type': 'loss', 'content': 0.14586272835731506, 'timestamp': '2025-09-30 22:33:51.588981', 'step': 13799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:51.649768', 'step': 13799, 'epoch': 2} {'type': 'loss', 'content': 0.11800894141197205, 'timestamp': '2025-09-30 22:33:51.659029', 'step': 13800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:51.716495', 'step': 13800, 'epoch': 2} {'type': 'loss', 'content': 0.10685063898563385, 'timestamp': '2025-09-30 22:33:51.719457', 'step': 13801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:51.781495', 'step': 13801, 'epoch': 2} {'type': 'loss', 'content': 0.13840577006340027, 'timestamp': '2025-09-30 22:33:51.785112', 'step': 13802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:51.847836', 'step': 13802, 'epoch': 2} {'type': 'loss', 'content': 0.1715821623802185, 'timestamp': '2025-09-30 22:33:51.854206', 'step': 13803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:51.927238', 'step': 13803, 'epoch': 2} {'type': 'loss', 'content': 0.10895296186208725, 'timestamp': '2025-09-30 22:33:51.933797', 'step': 13804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:51.990202', 'step': 13804, 'epoch': 2} {'type': 'loss', 'content': 0.17430691421031952, 'timestamp': '2025-09-30 22:33:51.998160', 'step': 13805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:52.059207', 'step': 13805, 'epoch': 2} {'type': 'loss', 'content': 0.0797940120100975, 'timestamp': '2025-09-30 22:33:52.069960', 'step': 13806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:52.127683', 'step': 13806, 'epoch': 2} {'type': 'loss', 'content': 0.10260702669620514, 'timestamp': '2025-09-30 22:33:52.130636', 'step': 13807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:52.187419', 'step': 13807, 'epoch': 2} {'type': 'loss', 'content': 0.09817617386579514, 'timestamp': '2025-09-30 22:33:52.199783', 'step': 13808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:52.259768', 'step': 13808, 'epoch': 2} {'type': 'loss', 'content': 0.09888841211795807, 'timestamp': '2025-09-30 22:33:52.262112', 'step': 13809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:52.321353', 'step': 13809, 'epoch': 2} {'type': 'loss', 'content': 0.14361856877803802, 'timestamp': '2025-09-30 22:33:52.326406', 'step': 13810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:52.383767', 'step': 13810, 'epoch': 2} {'type': 'loss', 'content': 0.08849292993545532, 'timestamp': '2025-09-30 22:33:52.387545', 'step': 13811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:52.445715', 'step': 13811, 'epoch': 2} {'type': 'loss', 'content': 0.1599602997303009, 'timestamp': '2025-09-30 22:33:52.451763', 'step': 13812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:52.514236', 'step': 13812, 'epoch': 2} {'type': 'loss', 'content': 0.10232957452535629, 'timestamp': '2025-09-30 22:33:52.517141', 'step': 13813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:52.591626', 'step': 13813, 'epoch': 2} {'type': 'loss', 'content': 0.1216876357793808, 'timestamp': '2025-09-30 22:33:52.605140', 'step': 13814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:52.670204', 'step': 13814, 'epoch': 2} {'type': 'loss', 'content': 0.19102047383785248, 'timestamp': '2025-09-30 22:33:52.674385', 'step': 13815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:52.745592', 'step': 13815, 'epoch': 2} {'type': 'loss', 'content': 0.13567812740802765, 'timestamp': '2025-09-30 22:33:52.752814', 'step': 13816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:52.824674', 'step': 13816, 'epoch': 2} {'type': 'loss', 'content': 0.11497630923986435, 'timestamp': '2025-09-30 22:33:52.832464', 'step': 13817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:52.890049', 'step': 13817, 'epoch': 2} {'type': 'loss', 'content': 0.14814871549606323, 'timestamp': '2025-09-30 22:33:52.899307', 'step': 13818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:52.963281', 'step': 13818, 'epoch': 2} {'type': 'loss', 'content': 0.1078178882598877, 'timestamp': '2025-09-30 22:33:52.966888', 'step': 13819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:53.047360', 'step': 13819, 'epoch': 2} {'type': 'loss', 'content': 0.21101152896881104, 'timestamp': '2025-09-30 22:33:53.054541', 'step': 13820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:53.115777', 'step': 13820, 'epoch': 2} {'type': 'loss', 'content': 0.07767153531312943, 'timestamp': '2025-09-30 22:33:53.118774', 'step': 13821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:53.175654', 'step': 13821, 'epoch': 2} {'type': 'loss', 'content': 0.08604732155799866, 'timestamp': '2025-09-30 22:33:53.179199', 'step': 13822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:53.240026', 'step': 13822, 'epoch': 2} {'type': 'loss', 'content': 0.11741605401039124, 'timestamp': '2025-09-30 22:33:53.246197', 'step': 13823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:53.303393', 'step': 13823, 'epoch': 2} {'type': 'loss', 'content': 0.16846440732479095, 'timestamp': '2025-09-30 22:33:53.309803', 'step': 13824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:53.367955', 'step': 13824, 'epoch': 2} {'type': 'loss', 'content': 0.10195665806531906, 'timestamp': '2025-09-30 22:33:53.371012', 'step': 13825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:53.429284', 'step': 13825, 'epoch': 2} {'type': 'loss', 'content': 0.08618107438087463, 'timestamp': '2025-09-30 22:33:53.432604', 'step': 13826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:53.489629', 'step': 13826, 'epoch': 2} {'type': 'loss', 'content': 0.08906777203083038, 'timestamp': '2025-09-30 22:33:53.504130', 'step': 13827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:53.566770', 'step': 13827, 'epoch': 2} {'type': 'loss', 'content': 0.10859184712171555, 'timestamp': '2025-09-30 22:33:53.573656', 'step': 13828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:53.630901', 'step': 13828, 'epoch': 2} {'type': 'loss', 'content': 0.11977133899927139, 'timestamp': '2025-09-30 22:33:53.635284', 'step': 13829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:53.692940', 'step': 13829, 'epoch': 2} {'type': 'loss', 'content': 0.16024120151996613, 'timestamp': '2025-09-30 22:33:53.695832', 'step': 13830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:53.752759', 'step': 13830, 'epoch': 2} {'type': 'loss', 'content': 0.11994725465774536, 'timestamp': '2025-09-30 22:33:53.755756', 'step': 13831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:53.817292', 'step': 13831, 'epoch': 2} {'type': 'loss', 'content': 0.06389294564723969, 'timestamp': '2025-09-30 22:33:53.824881', 'step': 13832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:53.880889', 'step': 13832, 'epoch': 2} {'type': 'loss', 'content': 0.130387082695961, 'timestamp': '2025-09-30 22:33:53.884540', 'step': 13833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:53.943554', 'step': 13833, 'epoch': 2} {'type': 'loss', 'content': 0.07814478874206543, 'timestamp': '2025-09-30 22:33:53.948453', 'step': 13834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:54.007510', 'step': 13834, 'epoch': 2} {'type': 'loss', 'content': 0.11266619712114334, 'timestamp': '2025-09-30 22:33:54.014367', 'step': 13835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:54.075065', 'step': 13835, 'epoch': 2} {'type': 'loss', 'content': 0.14118672907352448, 'timestamp': '2025-09-30 22:33:54.082658', 'step': 13836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:54.151663', 'step': 13836, 'epoch': 2} {'type': 'loss', 'content': 0.14390435814857483, 'timestamp': '2025-09-30 22:33:54.153755', 'step': 13837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:54.218707', 'step': 13837, 'epoch': 2} {'type': 'loss', 'content': 0.08611034601926804, 'timestamp': '2025-09-30 22:33:54.224012', 'step': 13838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:54.287085', 'step': 13838, 'epoch': 2} {'type': 'loss', 'content': 0.12180450558662415, 'timestamp': '2025-09-30 22:33:54.290027', 'step': 13839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:54.350132', 'step': 13839, 'epoch': 2} {'type': 'loss', 'content': 0.14177334308624268, 'timestamp': '2025-09-30 22:33:54.358252', 'step': 13840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:54.414900', 'step': 13840, 'epoch': 2} {'type': 'loss', 'content': 0.07524328678846359, 'timestamp': '2025-09-30 22:33:54.417588', 'step': 13841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:54.475583', 'step': 13841, 'epoch': 2} {'type': 'loss', 'content': 0.12412367016077042, 'timestamp': '2025-09-30 22:33:54.477796', 'step': 13842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:54.534841', 'step': 13842, 'epoch': 2} {'type': 'loss', 'content': 0.0892285704612732, 'timestamp': '2025-09-30 22:33:54.539634', 'step': 13843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:54.618984', 'step': 13843, 'epoch': 2} {'type': 'loss', 'content': 0.07891776412725449, 'timestamp': '2025-09-30 22:33:54.624984', 'step': 13844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:54.682241', 'step': 13844, 'epoch': 2} {'type': 'loss', 'content': 0.12473518401384354, 'timestamp': '2025-09-30 22:33:54.688495', 'step': 13845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:54.746699', 'step': 13845, 'epoch': 2} {'type': 'loss', 'content': 0.18329453468322754, 'timestamp': '2025-09-30 22:33:54.749926', 'step': 13846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:54.806950', 'step': 13846, 'epoch': 2} {'type': 'loss', 'content': 0.19492577016353607, 'timestamp': '2025-09-30 22:33:54.811666', 'step': 13847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:54.868441', 'step': 13847, 'epoch': 2} {'type': 'loss', 'content': 0.18916545808315277, 'timestamp': '2025-09-30 22:33:54.877867', 'step': 13848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:54.936932', 'step': 13848, 'epoch': 2} {'type': 'loss', 'content': 0.13042697310447693, 'timestamp': '2025-09-30 22:33:54.942751', 'step': 13849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:55.000581', 'step': 13849, 'epoch': 2} {'type': 'loss', 'content': 0.1057959720492363, 'timestamp': '2025-09-30 22:33:55.003682', 'step': 13850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:55.060618', 'step': 13850, 'epoch': 2} {'type': 'loss', 'content': 0.10818135738372803, 'timestamp': '2025-09-30 22:33:55.063481', 'step': 13851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:55.120631', 'step': 13851, 'epoch': 2} {'type': 'loss', 'content': 0.17821939289569855, 'timestamp': '2025-09-30 22:33:55.127885', 'step': 13852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:55.184917', 'step': 13852, 'epoch': 2} {'type': 'loss', 'content': 0.11178005486726761, 'timestamp': '2025-09-30 22:33:55.188982', 'step': 13853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:55.251137', 'step': 13853, 'epoch': 2} {'type': 'loss', 'content': 0.14295357465744019, 'timestamp': '2025-09-30 22:33:55.254684', 'step': 13854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:55.312395', 'step': 13854, 'epoch': 2} {'type': 'loss', 'content': 0.10883122682571411, 'timestamp': '2025-09-30 22:33:55.317803', 'step': 13855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:55.378683', 'step': 13855, 'epoch': 2} {'type': 'loss', 'content': 0.17959055304527283, 'timestamp': '2025-09-30 22:33:55.384928', 'step': 13856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:55.441145', 'step': 13856, 'epoch': 2} {'type': 'loss', 'content': 0.1182316392660141, 'timestamp': '2025-09-30 22:33:55.444367', 'step': 13857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:55.502540', 'step': 13857, 'epoch': 2} {'type': 'loss', 'content': 0.13192541897296906, 'timestamp': '2025-09-30 22:33:55.509293', 'step': 13858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:55.573086', 'step': 13858, 'epoch': 2} {'type': 'loss', 'content': 0.16032929718494415, 'timestamp': '2025-09-30 22:33:55.577405', 'step': 13859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:55.637148', 'step': 13859, 'epoch': 2} {'type': 'loss', 'content': 0.08009213209152222, 'timestamp': '2025-09-30 22:33:55.644102', 'step': 13860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:55.702522', 'step': 13860, 'epoch': 2} {'type': 'loss', 'content': 0.1593896448612213, 'timestamp': '2025-09-30 22:33:55.706449', 'step': 13861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:55.764162', 'step': 13861, 'epoch': 2} {'type': 'loss', 'content': 0.15800601243972778, 'timestamp': '2025-09-30 22:33:55.769211', 'step': 13862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:55.829468', 'step': 13862, 'epoch': 2} {'type': 'loss', 'content': 0.1956908255815506, 'timestamp': '2025-09-30 22:33:55.834629', 'step': 13863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:55.894446', 'step': 13863, 'epoch': 2} {'type': 'loss', 'content': 0.12656010687351227, 'timestamp': '2025-09-30 22:33:55.900784', 'step': 13864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:55.958105', 'step': 13864, 'epoch': 2} {'type': 'loss', 'content': 0.18953734636306763, 'timestamp': '2025-09-30 22:33:55.961648', 'step': 13865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.018950', 'step': 13865, 'epoch': 2} {'type': 'loss', 'content': 0.06656158715486526, 'timestamp': '2025-09-30 22:33:56.030559', 'step': 13866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:56.088456', 'step': 13866, 'epoch': 2} {'type': 'loss', 'content': 0.1640264391899109, 'timestamp': '2025-09-30 22:33:56.091254', 'step': 13867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:56.147427', 'step': 13867, 'epoch': 2} {'type': 'loss', 'content': 0.06049634516239166, 'timestamp': '2025-09-30 22:33:56.153496', 'step': 13868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.224900', 'step': 13868, 'epoch': 2} {'type': 'loss', 'content': 0.07224898040294647, 'timestamp': '2025-09-30 22:33:56.227823', 'step': 13869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.284733', 'step': 13869, 'epoch': 2} {'type': 'loss', 'content': 0.10444556176662445, 'timestamp': '2025-09-30 22:33:56.296313', 'step': 13870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.360730', 'step': 13870, 'epoch': 2} {'type': 'loss', 'content': 0.06868702173233032, 'timestamp': '2025-09-30 22:33:56.363091', 'step': 13871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:56.423543', 'step': 13871, 'epoch': 2} {'type': 'loss', 'content': 0.06207265332341194, 'timestamp': '2025-09-30 22:33:56.429709', 'step': 13872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:33:56.487793', 'step': 13872, 'epoch': 2} {'type': 'loss', 'content': 0.0930497944355011, 'timestamp': '2025-09-30 22:33:56.493025', 'step': 13873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:56.553046', 'step': 13873, 'epoch': 2} {'type': 'loss', 'content': 0.19341018795967102, 'timestamp': '2025-09-30 22:33:56.558936', 'step': 13874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:56.630720', 'step': 13874, 'epoch': 2} {'type': 'loss', 'content': 0.09729637950658798, 'timestamp': '2025-09-30 22:33:56.632994', 'step': 13875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.689517', 'step': 13875, 'epoch': 2} {'type': 'loss', 'content': 0.1281440705060959, 'timestamp': '2025-09-30 22:33:56.696155', 'step': 13876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:56.752650', 'step': 13876, 'epoch': 2} {'type': 'loss', 'content': 0.08021534979343414, 'timestamp': '2025-09-30 22:33:56.756633', 'step': 13877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.815337', 'step': 13877, 'epoch': 2} {'type': 'loss', 'content': 0.046927839517593384, 'timestamp': '2025-09-30 22:33:56.819184', 'step': 13878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.882148', 'step': 13878, 'epoch': 2} {'type': 'loss', 'content': 0.1530994027853012, 'timestamp': '2025-09-30 22:33:56.887900', 'step': 13879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:56.947456', 'step': 13879, 'epoch': 2} {'type': 'loss', 'content': 0.0922636017203331, 'timestamp': '2025-09-30 22:33:56.954231', 'step': 13880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:57.016353', 'step': 13880, 'epoch': 2} {'type': 'loss', 'content': 0.09688451886177063, 'timestamp': '2025-09-30 22:33:57.019537', 'step': 13881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:57.076685', 'step': 13881, 'epoch': 2} {'type': 'loss', 'content': 0.1887086033821106, 'timestamp': '2025-09-30 22:33:57.082780', 'step': 13882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:57.140712', 'step': 13882, 'epoch': 2} {'type': 'loss', 'content': 0.10683640837669373, 'timestamp': '2025-09-30 22:33:57.143284', 'step': 13883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:33:57.205754', 'step': 13883, 'epoch': 2} {'type': 'loss', 'content': 0.10925759375095367, 'timestamp': '2025-09-30 22:33:57.216099', 'step': 13884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:57.275675', 'step': 13884, 'epoch': 2} {'type': 'loss', 'content': 0.05559782683849335, 'timestamp': '2025-09-30 22:33:57.284937', 'step': 13885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:57.350877', 'step': 13885, 'epoch': 2} {'type': 'loss', 'content': 0.07316144555807114, 'timestamp': '2025-09-30 22:33:57.363394', 'step': 13886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:57.424524', 'step': 13886, 'epoch': 2} {'type': 'loss', 'content': 0.09137360751628876, 'timestamp': '2025-09-30 22:33:57.426917', 'step': 13887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:57.488804', 'step': 13887, 'epoch': 2} {'type': 'loss', 'content': 0.15813806653022766, 'timestamp': '2025-09-30 22:33:57.498296', 'step': 13888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:57.556841', 'step': 13888, 'epoch': 2} {'type': 'loss', 'content': 0.09003859013319016, 'timestamp': '2025-09-30 22:33:57.559637', 'step': 13889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:57.617319', 'step': 13889, 'epoch': 2} {'type': 'loss', 'content': 0.1358548402786255, 'timestamp': '2025-09-30 22:33:57.621236', 'step': 13890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:57.679138', 'step': 13890, 'epoch': 2} {'type': 'loss', 'content': 0.10151125490665436, 'timestamp': '2025-09-30 22:33:57.682640', 'step': 13891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:57.744117', 'step': 13891, 'epoch': 2} {'type': 'loss', 'content': 0.15337909758090973, 'timestamp': '2025-09-30 22:33:57.757802', 'step': 13892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:57.814569', 'step': 13892, 'epoch': 2} {'type': 'loss', 'content': 0.10300499945878983, 'timestamp': '2025-09-30 22:33:57.819333', 'step': 13893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:57.877164', 'step': 13893, 'epoch': 2} {'type': 'loss', 'content': 0.07127618044614792, 'timestamp': '2025-09-30 22:33:57.880194', 'step': 13894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:57.938968', 'step': 13894, 'epoch': 2} {'type': 'loss', 'content': 0.1281113475561142, 'timestamp': '2025-09-30 22:33:57.943924', 'step': 13895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:58.003107', 'step': 13895, 'epoch': 2} {'type': 'loss', 'content': 0.14392371475696564, 'timestamp': '2025-09-30 22:33:58.009226', 'step': 13896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:58.066083', 'step': 13896, 'epoch': 2} {'type': 'loss', 'content': 0.1449277400970459, 'timestamp': '2025-09-30 22:33:58.068677', 'step': 13897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:58.128229', 'step': 13897, 'epoch': 2} {'type': 'loss', 'content': 0.1139102503657341, 'timestamp': '2025-09-30 22:33:58.136028', 'step': 13898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:58.199822', 'step': 13898, 'epoch': 2} {'type': 'loss', 'content': 0.12084612995386124, 'timestamp': '2025-09-30 22:33:58.210918', 'step': 13899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:58.269136', 'step': 13899, 'epoch': 2} {'type': 'loss', 'content': 0.080173559486866, 'timestamp': '2025-09-30 22:33:58.275283', 'step': 13900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:58.343952', 'step': 13900, 'epoch': 2} {'type': 'loss', 'content': 0.12473055720329285, 'timestamp': '2025-09-30 22:33:58.354000', 'step': 13901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:58.426209', 'step': 13901, 'epoch': 2} {'type': 'loss', 'content': 0.11045019328594208, 'timestamp': '2025-09-30 22:33:58.429548', 'step': 13902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:58.520175', 'step': 13902, 'epoch': 2} {'type': 'loss', 'content': 0.08135409653186798, 'timestamp': '2025-09-30 22:33:58.523242', 'step': 13903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:58.598086', 'step': 13903, 'epoch': 2} {'type': 'loss', 'content': 0.12160833179950714, 'timestamp': '2025-09-30 22:33:58.606984', 'step': 13904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:58.686939', 'step': 13904, 'epoch': 2} {'type': 'loss', 'content': 0.1660456657409668, 'timestamp': '2025-09-30 22:33:58.693228', 'step': 13905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:58.801811', 'step': 13905, 'epoch': 2} {'type': 'loss', 'content': 0.1771026849746704, 'timestamp': '2025-09-30 22:33:58.807009', 'step': 13906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:58.878314', 'step': 13906, 'epoch': 2} {'type': 'loss', 'content': 0.17966611683368683, 'timestamp': '2025-09-30 22:33:58.882360', 'step': 13907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:58.970773', 'step': 13907, 'epoch': 2} {'type': 'loss', 'content': 0.1737702637910843, 'timestamp': '2025-09-30 22:33:58.977476', 'step': 13908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:59.047220', 'step': 13908, 'epoch': 2} {'type': 'loss', 'content': 0.09699319303035736, 'timestamp': '2025-09-30 22:33:59.051049', 'step': 13909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:59.123373', 'step': 13909, 'epoch': 2} {'type': 'loss', 'content': 0.06351008266210556, 'timestamp': '2025-09-30 22:33:59.126021', 'step': 13910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:59.190336', 'step': 13910, 'epoch': 2} {'type': 'loss', 'content': 0.07075590640306473, 'timestamp': '2025-09-30 22:33:59.192871', 'step': 13911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:59.279193', 'step': 13911, 'epoch': 2} {'type': 'loss', 'content': 0.11473239958286285, 'timestamp': '2025-09-30 22:33:59.286851', 'step': 13912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:59.350365', 'step': 13912, 'epoch': 2} {'type': 'loss', 'content': 0.11642573773860931, 'timestamp': '2025-09-30 22:33:59.353902', 'step': 13913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:59.428784', 'step': 13913, 'epoch': 2} {'type': 'loss', 'content': 0.1113656610250473, 'timestamp': '2025-09-30 22:33:59.439922', 'step': 13914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:33:59.514142', 'step': 13914, 'epoch': 2} {'type': 'loss', 'content': 0.11349112540483475, 'timestamp': '2025-09-30 22:33:59.519459', 'step': 13915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:33:59.610572', 'step': 13915, 'epoch': 2} {'type': 'loss', 'content': 0.11850645393133163, 'timestamp': '2025-09-30 22:33:59.616592', 'step': 13916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:33:59.690199', 'step': 13916, 'epoch': 2} {'type': 'loss', 'content': 0.11567021906375885, 'timestamp': '2025-09-30 22:33:59.693046', 'step': 13917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:59.772441', 'step': 13917, 'epoch': 2} {'type': 'loss', 'content': 0.08500798791646957, 'timestamp': '2025-09-30 22:33:59.774877', 'step': 13918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:59.832672', 'step': 13918, 'epoch': 2} {'type': 'loss', 'content': 0.1564241498708725, 'timestamp': '2025-09-30 22:33:59.840018', 'step': 13919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:33:59.897247', 'step': 13919, 'epoch': 2} {'type': 'loss', 'content': 0.07977720350027084, 'timestamp': '2025-09-30 22:33:59.904408', 'step': 13920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:33:59.961138', 'step': 13920, 'epoch': 2} {'type': 'loss', 'content': 0.12166037410497665, 'timestamp': '2025-09-30 22:33:59.964454', 'step': 13921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:00.024333', 'step': 13921, 'epoch': 2} {'type': 'loss', 'content': 0.06532789766788483, 'timestamp': '2025-09-30 22:34:00.037425', 'step': 13922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:00.093653', 'step': 13922, 'epoch': 2} {'type': 'loss', 'content': 0.13811266422271729, 'timestamp': '2025-09-30 22:34:00.098618', 'step': 13923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:00.168755', 'step': 13923, 'epoch': 2} {'type': 'loss', 'content': 0.1633339524269104, 'timestamp': '2025-09-30 22:34:00.175818', 'step': 13924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:00.237005', 'step': 13924, 'epoch': 2} {'type': 'loss', 'content': 0.154764786362648, 'timestamp': '2025-09-30 22:34:00.243494', 'step': 13925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:00.303217', 'step': 13925, 'epoch': 2} {'type': 'loss', 'content': 0.07835780829191208, 'timestamp': '2025-09-30 22:34:00.308534', 'step': 13926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:00.379559', 'step': 13926, 'epoch': 2} {'type': 'loss', 'content': 0.09971413761377335, 'timestamp': '2025-09-30 22:34:00.382834', 'step': 13927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:00.440535', 'step': 13927, 'epoch': 2} {'type': 'loss', 'content': 0.1793884038925171, 'timestamp': '2025-09-30 22:34:00.454042', 'step': 13928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:00.526263', 'step': 13928, 'epoch': 2} {'type': 'loss', 'content': 0.10232260823249817, 'timestamp': '2025-09-30 22:34:00.528959', 'step': 13929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:00.597510', 'step': 13929, 'epoch': 2} {'type': 'loss', 'content': 0.07177440822124481, 'timestamp': '2025-09-30 22:34:00.602608', 'step': 13930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:00.663969', 'step': 13930, 'epoch': 2} {'type': 'loss', 'content': 0.12075731158256531, 'timestamp': '2025-09-30 22:34:00.667328', 'step': 13931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:00.724448', 'step': 13931, 'epoch': 2} {'type': 'loss', 'content': 0.060592733323574066, 'timestamp': '2025-09-30 22:34:00.743290', 'step': 13932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:00.800858', 'step': 13932, 'epoch': 2} {'type': 'loss', 'content': 0.10412751883268356, 'timestamp': '2025-09-30 22:34:00.803624', 'step': 13933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:00.876224', 'step': 13933, 'epoch': 2} {'type': 'loss', 'content': 0.12448098510503769, 'timestamp': '2025-09-30 22:34:00.880455', 'step': 13934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:00.939110', 'step': 13934, 'epoch': 2} {'type': 'loss', 'content': 0.1312258392572403, 'timestamp': '2025-09-30 22:34:00.941982', 'step': 13935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:01.002221', 'step': 13935, 'epoch': 2} {'type': 'loss', 'content': 0.09499531984329224, 'timestamp': '2025-09-30 22:34:01.008597', 'step': 13936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:01.065075', 'step': 13936, 'epoch': 2} {'type': 'loss', 'content': 0.0845128670334816, 'timestamp': '2025-09-30 22:34:01.067978', 'step': 13937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:01.125954', 'step': 13937, 'epoch': 2} {'type': 'loss', 'content': 0.11582500487565994, 'timestamp': '2025-09-30 22:34:01.129351', 'step': 13938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:01.211070', 'step': 13938, 'epoch': 2} {'type': 'loss', 'content': 0.1504981517791748, 'timestamp': '2025-09-30 22:34:01.213729', 'step': 13939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:01.272145', 'step': 13939, 'epoch': 2} {'type': 'loss', 'content': 0.12128928303718567, 'timestamp': '2025-09-30 22:34:01.278969', 'step': 13940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:01.360528', 'step': 13940, 'epoch': 2} {'type': 'loss', 'content': 0.14337432384490967, 'timestamp': '2025-09-30 22:34:01.363170', 'step': 13941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:01.419979', 'step': 13941, 'epoch': 2} {'type': 'loss', 'content': 0.1260869950056076, 'timestamp': '2025-09-30 22:34:01.422754', 'step': 13942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:01.479478', 'step': 13942, 'epoch': 2} {'type': 'loss', 'content': 0.12862300872802734, 'timestamp': '2025-09-30 22:34:01.482663', 'step': 13943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:01.539683', 'step': 13943, 'epoch': 2} {'type': 'loss', 'content': 0.1010388508439064, 'timestamp': '2025-09-30 22:34:01.546143', 'step': 13944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:01.603629', 'step': 13944, 'epoch': 2} {'type': 'loss', 'content': 0.13799422979354858, 'timestamp': '2025-09-30 22:34:01.610375', 'step': 13945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:01.673879', 'step': 13945, 'epoch': 2} {'type': 'loss', 'content': 0.09553281217813492, 'timestamp': '2025-09-30 22:34:01.679775', 'step': 13946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:01.738023', 'step': 13946, 'epoch': 2} {'type': 'loss', 'content': 0.11091749370098114, 'timestamp': '2025-09-30 22:34:01.740781', 'step': 13947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:01.796920', 'step': 13947, 'epoch': 2} {'type': 'loss', 'content': 0.09374504536390305, 'timestamp': '2025-09-30 22:34:01.804871', 'step': 13948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:01.865780', 'step': 13948, 'epoch': 2} {'type': 'loss', 'content': 0.11095987260341644, 'timestamp': '2025-09-30 22:34:01.873276', 'step': 13949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:01.942958', 'step': 13949, 'epoch': 2} {'type': 'loss', 'content': 0.10542428493499756, 'timestamp': '2025-09-30 22:34:01.947239', 'step': 13950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:02.005275', 'step': 13950, 'epoch': 2} {'type': 'loss', 'content': 0.09127267450094223, 'timestamp': '2025-09-30 22:34:02.010877', 'step': 13951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:02.069431', 'step': 13951, 'epoch': 2} {'type': 'loss', 'content': 0.11589904129505157, 'timestamp': '2025-09-30 22:34:02.075717', 'step': 13952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:02.131945', 'step': 13952, 'epoch': 2} {'type': 'loss', 'content': 0.1610560119152069, 'timestamp': '2025-09-30 22:34:02.134404', 'step': 13953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:02.192505', 'step': 13953, 'epoch': 2} {'type': 'loss', 'content': 0.11065560579299927, 'timestamp': '2025-09-30 22:34:02.197729', 'step': 13954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:02.258530', 'step': 13954, 'epoch': 2} {'type': 'loss', 'content': 0.09428881853818893, 'timestamp': '2025-09-30 22:34:02.261941', 'step': 13955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:02.320298', 'step': 13955, 'epoch': 2} {'type': 'loss', 'content': 0.10597385466098785, 'timestamp': '2025-09-30 22:34:02.330413', 'step': 13956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:02.392899', 'step': 13956, 'epoch': 2} {'type': 'loss', 'content': 0.1311035007238388, 'timestamp': '2025-09-30 22:34:02.397684', 'step': 13957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:02.473270', 'step': 13957, 'epoch': 2} {'type': 'loss', 'content': 0.14254239201545715, 'timestamp': '2025-09-30 22:34:02.485379', 'step': 13958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:02.546285', 'step': 13958, 'epoch': 2} {'type': 'loss', 'content': 0.131804421544075, 'timestamp': '2025-09-30 22:34:02.549434', 'step': 13959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:02.621262', 'step': 13959, 'epoch': 2} {'type': 'loss', 'content': 0.20848317444324493, 'timestamp': '2025-09-30 22:34:02.628136', 'step': 13960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:02.684188', 'step': 13960, 'epoch': 2} {'type': 'loss', 'content': 0.15493440628051758, 'timestamp': '2025-09-30 22:34:02.691528', 'step': 13961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:02.765263', 'step': 13961, 'epoch': 2} {'type': 'loss', 'content': 0.1580716073513031, 'timestamp': '2025-09-30 22:34:02.768505', 'step': 13962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:02.827422', 'step': 13962, 'epoch': 2} {'type': 'loss', 'content': 0.1610691398382187, 'timestamp': '2025-09-30 22:34:02.832393', 'step': 13963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:02.888716', 'step': 13963, 'epoch': 2} {'type': 'loss', 'content': 0.14451487362384796, 'timestamp': '2025-09-30 22:34:02.895098', 'step': 13964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:02.951219', 'step': 13964, 'epoch': 2} {'type': 'loss', 'content': 0.09497644752264023, 'timestamp': '2025-09-30 22:34:02.956330', 'step': 13965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.015797', 'step': 13965, 'epoch': 2} {'type': 'loss', 'content': 0.08103696256875992, 'timestamp': '2025-09-30 22:34:03.018807', 'step': 13966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:03.081949', 'step': 13966, 'epoch': 2} {'type': 'loss', 'content': 0.1252637505531311, 'timestamp': '2025-09-30 22:34:03.085611', 'step': 13967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:03.147817', 'step': 13967, 'epoch': 2} {'type': 'loss', 'content': 0.1292751282453537, 'timestamp': '2025-09-30 22:34:03.163466', 'step': 13968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:03.222872', 'step': 13968, 'epoch': 2} {'type': 'loss', 'content': 0.15558688342571259, 'timestamp': '2025-09-30 22:34:03.225435', 'step': 13969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.291051', 'step': 13969, 'epoch': 2} {'type': 'loss', 'content': 0.09048384428024292, 'timestamp': '2025-09-30 22:34:03.293639', 'step': 13970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.352977', 'step': 13970, 'epoch': 2} {'type': 'loss', 'content': 0.11547975987195969, 'timestamp': '2025-09-30 22:34:03.360510', 'step': 13971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.420862', 'step': 13971, 'epoch': 2} {'type': 'loss', 'content': 0.16898414492607117, 'timestamp': '2025-09-30 22:34:03.427136', 'step': 13972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:03.482757', 'step': 13972, 'epoch': 2} {'type': 'loss', 'content': 0.05889691412448883, 'timestamp': '2025-09-30 22:34:03.488426', 'step': 13973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.552759', 'step': 13973, 'epoch': 2} {'type': 'loss', 'content': 0.15784816443920135, 'timestamp': '2025-09-30 22:34:03.555063', 'step': 13974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:03.614736', 'step': 13974, 'epoch': 2} {'type': 'loss', 'content': 0.11108464002609253, 'timestamp': '2025-09-30 22:34:03.618882', 'step': 13975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.676292', 'step': 13975, 'epoch': 2} {'type': 'loss', 'content': 0.1711573302745819, 'timestamp': '2025-09-30 22:34:03.685893', 'step': 13976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.741969', 'step': 13976, 'epoch': 2} {'type': 'loss', 'content': 0.08531472086906433, 'timestamp': '2025-09-30 22:34:03.746280', 'step': 13977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:03.803870', 'step': 13977, 'epoch': 2} {'type': 'loss', 'content': 0.12348074465990067, 'timestamp': '2025-09-30 22:34:03.816070', 'step': 13978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:03.874320', 'step': 13978, 'epoch': 2} {'type': 'loss', 'content': 0.18937841057777405, 'timestamp': '2025-09-30 22:34:03.878456', 'step': 13979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:03.937242', 'step': 13979, 'epoch': 2} {'type': 'loss', 'content': 0.08582141250371933, 'timestamp': '2025-09-30 22:34:03.944899', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:34:18.329071', 'step': 13980, 'epoch': 2} {'type': 'pplx', 'content': 8919.503943146721, 'timestamp': '2025-09-30 22:34:18.347377', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:18.405173', 'step': 13980, 'epoch': 2} {'type': 'loss', 'content': 0.07388582080602646, 'timestamp': '2025-09-30 22:34:18.408347', 'step': 13981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:18.484427', 'step': 13981, 'epoch': 2} {'type': 'loss', 'content': 0.11352252960205078, 'timestamp': '2025-09-30 22:34:18.488028', 'step': 13982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:18.559965', 'step': 13982, 'epoch': 2} {'type': 'loss', 'content': 0.10635818541049957, 'timestamp': '2025-09-30 22:34:18.563736', 'step': 13983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:18.622208', 'step': 13983, 'epoch': 2} {'type': 'loss', 'content': 0.1556461900472641, 'timestamp': '2025-09-30 22:34:18.628896', 'step': 13984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:18.687156', 'step': 13984, 'epoch': 2} {'type': 'loss', 'content': 0.09542598575353622, 'timestamp': '2025-09-30 22:34:18.691182', 'step': 13985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:18.758447', 'step': 13985, 'epoch': 2} {'type': 'loss', 'content': 0.06266207247972488, 'timestamp': '2025-09-30 22:34:18.762579', 'step': 13986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:18.821497', 'step': 13986, 'epoch': 2} {'type': 'loss', 'content': 0.16326232254505157, 'timestamp': '2025-09-30 22:34:18.825897', 'step': 13987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:18.884566', 'step': 13987, 'epoch': 2} {'type': 'loss', 'content': 0.08331190794706345, 'timestamp': '2025-09-30 22:34:18.895563', 'step': 13988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:18.953975', 'step': 13988, 'epoch': 2} {'type': 'loss', 'content': 0.12063152343034744, 'timestamp': '2025-09-30 22:34:18.956448', 'step': 13989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:19.020067', 'step': 13989, 'epoch': 2} {'type': 'loss', 'content': 0.24249309301376343, 'timestamp': '2025-09-30 22:34:19.024125', 'step': 13990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:19.083519', 'step': 13990, 'epoch': 2} {'type': 'loss', 'content': 0.12537634372711182, 'timestamp': '2025-09-30 22:34:19.091411', 'step': 13991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:19.162845', 'step': 13991, 'epoch': 2} {'type': 'loss', 'content': 0.08912370353937149, 'timestamp': '2025-09-30 22:34:19.169567', 'step': 13992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:19.227477', 'step': 13992, 'epoch': 2} {'type': 'loss', 'content': 0.09322581440210342, 'timestamp': '2025-09-30 22:34:19.230501', 'step': 13993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:19.289607', 'step': 13993, 'epoch': 2} {'type': 'loss', 'content': 0.11527345329523087, 'timestamp': '2025-09-30 22:34:19.299334', 'step': 13994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:19.369020', 'step': 13994, 'epoch': 2} {'type': 'loss', 'content': 0.04612619802355766, 'timestamp': '2025-09-30 22:34:19.371386', 'step': 13995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:19.434603', 'step': 13995, 'epoch': 2} {'type': 'loss', 'content': 0.09171903878450394, 'timestamp': '2025-09-30 22:34:19.444812', 'step': 13996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:19.506156', 'step': 13996, 'epoch': 2} {'type': 'loss', 'content': 0.037158265709877014, 'timestamp': '2025-09-30 22:34:19.508549', 'step': 13997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:19.579155', 'step': 13997, 'epoch': 2} {'type': 'loss', 'content': 0.13491013646125793, 'timestamp': '2025-09-30 22:34:19.582520', 'step': 13998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:19.653411', 'step': 13998, 'epoch': 2} {'type': 'loss', 'content': 0.13681434094905853, 'timestamp': '2025-09-30 22:34:19.663827', 'step': 13999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:19.723366', 'step': 13999, 'epoch': 2} {'type': 'loss', 'content': 0.11786411702632904, 'timestamp': '2025-09-30 22:34:19.733127', 'step': 14000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14000', 'timestamp': '2025-09-30 22:34:20.183013', 'step': 14000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:20.243546', 'step': 14000, 'epoch': 2} {'type': 'loss', 'content': 0.12589947879314423, 'timestamp': '2025-09-30 22:34:20.246896', 'step': 14001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:20.305809', 'step': 14001, 'epoch': 2} {'type': 'loss', 'content': 0.10290371626615524, 'timestamp': '2025-09-30 22:34:20.309574', 'step': 14002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:20.374666', 'step': 14002, 'epoch': 2} {'type': 'loss', 'content': 0.14697903394699097, 'timestamp': '2025-09-30 22:34:20.378164', 'step': 14003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:20.439860', 'step': 14003, 'epoch': 2} {'type': 'loss', 'content': 0.09802331775426865, 'timestamp': '2025-09-30 22:34:20.446372', 'step': 14004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:20.502716', 'step': 14004, 'epoch': 2} {'type': 'loss', 'content': 0.05477350577712059, 'timestamp': '2025-09-30 22:34:20.514046', 'step': 14005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:20.573522', 'step': 14005, 'epoch': 2} {'type': 'loss', 'content': 0.07631853967905045, 'timestamp': '2025-09-30 22:34:20.576186', 'step': 14006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:20.634536', 'step': 14006, 'epoch': 2} {'type': 'loss', 'content': 0.03168987110257149, 'timestamp': '2025-09-30 22:34:20.644302', 'step': 14007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:20.711460', 'step': 14007, 'epoch': 2} {'type': 'loss', 'content': 0.10768753290176392, 'timestamp': '2025-09-30 22:34:20.719381', 'step': 14008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:20.786842', 'step': 14008, 'epoch': 2} {'type': 'loss', 'content': 0.14605411887168884, 'timestamp': '2025-09-30 22:34:20.789556', 'step': 14009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:20.850574', 'step': 14009, 'epoch': 2} {'type': 'loss', 'content': 0.17893628776073456, 'timestamp': '2025-09-30 22:34:20.853518', 'step': 14010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:20.910357', 'step': 14010, 'epoch': 2} {'type': 'loss', 'content': 0.145331010222435, 'timestamp': '2025-09-30 22:34:20.919895', 'step': 14011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:20.980023', 'step': 14011, 'epoch': 2} {'type': 'loss', 'content': 0.10518606752157211, 'timestamp': '2025-09-30 22:34:20.987377', 'step': 14012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:21.043660', 'step': 14012, 'epoch': 2} {'type': 'loss', 'content': 0.14979830384254456, 'timestamp': '2025-09-30 22:34:21.047760', 'step': 14013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:21.103579', 'step': 14013, 'epoch': 2} {'type': 'loss', 'content': 0.02380361780524254, 'timestamp': '2025-09-30 22:34:21.107879', 'step': 14014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.165567', 'step': 14014, 'epoch': 2} {'type': 'loss', 'content': 0.10862722247838974, 'timestamp': '2025-09-30 22:34:21.169569', 'step': 14015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:21.228647', 'step': 14015, 'epoch': 2} {'type': 'loss', 'content': 0.20878304541110992, 'timestamp': '2025-09-30 22:34:21.235040', 'step': 14016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:21.293800', 'step': 14016, 'epoch': 2} {'type': 'loss', 'content': 0.08325470983982086, 'timestamp': '2025-09-30 22:34:21.298662', 'step': 14017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:34:21.355398', 'step': 14017, 'epoch': 2} {'type': 'loss', 'content': 0.10167207568883896, 'timestamp': '2025-09-30 22:34:21.360628', 'step': 14018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:21.418881', 'step': 14018, 'epoch': 2} {'type': 'loss', 'content': 0.17550161480903625, 'timestamp': '2025-09-30 22:34:21.423284', 'step': 14019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.482674', 'step': 14019, 'epoch': 2} {'type': 'loss', 'content': 0.08786869049072266, 'timestamp': '2025-09-30 22:34:21.488977', 'step': 14020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.545666', 'step': 14020, 'epoch': 2} {'type': 'loss', 'content': 0.1272277534008026, 'timestamp': '2025-09-30 22:34:21.548434', 'step': 14021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.606983', 'step': 14021, 'epoch': 2} {'type': 'loss', 'content': 0.13381971418857574, 'timestamp': '2025-09-30 22:34:21.610914', 'step': 14022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.668637', 'step': 14022, 'epoch': 2} {'type': 'loss', 'content': 0.17101597785949707, 'timestamp': '2025-09-30 22:34:21.672460', 'step': 14023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:21.731541', 'step': 14023, 'epoch': 2} {'type': 'loss', 'content': 0.16942834854125977, 'timestamp': '2025-09-30 22:34:21.738143', 'step': 14024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:21.795375', 'step': 14024, 'epoch': 2} {'type': 'loss', 'content': 0.07668238133192062, 'timestamp': '2025-09-30 22:34:21.798237', 'step': 14025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.857766', 'step': 14025, 'epoch': 2} {'type': 'loss', 'content': 0.10795699805021286, 'timestamp': '2025-09-30 22:34:21.864167', 'step': 14026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.923230', 'step': 14026, 'epoch': 2} {'type': 'loss', 'content': 0.09906373918056488, 'timestamp': '2025-09-30 22:34:21.926228', 'step': 14027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:21.985507', 'step': 14027, 'epoch': 2} {'type': 'loss', 'content': 0.10593584924936295, 'timestamp': '2025-09-30 22:34:21.992549', 'step': 14028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:22.051320', 'step': 14028, 'epoch': 2} {'type': 'loss', 'content': 0.12681779265403748, 'timestamp': '2025-09-30 22:34:22.054995', 'step': 14029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:22.122929', 'step': 14029, 'epoch': 2} {'type': 'loss', 'content': 0.1812300682067871, 'timestamp': '2025-09-30 22:34:22.128593', 'step': 14030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:22.188678', 'step': 14030, 'epoch': 2} {'type': 'loss', 'content': 0.11831147968769073, 'timestamp': '2025-09-30 22:34:22.191842', 'step': 14031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:22.250739', 'step': 14031, 'epoch': 2} {'type': 'loss', 'content': 0.10959360748529434, 'timestamp': '2025-09-30 22:34:22.258200', 'step': 14032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:22.321107', 'step': 14032, 'epoch': 2} {'type': 'loss', 'content': 0.06628715246915817, 'timestamp': '2025-09-30 22:34:22.324850', 'step': 14033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:22.382005', 'step': 14033, 'epoch': 2} {'type': 'loss', 'content': 0.1084827184677124, 'timestamp': '2025-09-30 22:34:22.384032', 'step': 14034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:22.442478', 'step': 14034, 'epoch': 2} {'type': 'loss', 'content': 0.15053732693195343, 'timestamp': '2025-09-30 22:34:22.445561', 'step': 14035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:22.502911', 'step': 14035, 'epoch': 2} {'type': 'loss', 'content': 0.10747599601745605, 'timestamp': '2025-09-30 22:34:22.510226', 'step': 14036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:22.566365', 'step': 14036, 'epoch': 2} {'type': 'loss', 'content': 0.06931799650192261, 'timestamp': '2025-09-30 22:34:22.569182', 'step': 14037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:22.626833', 'step': 14037, 'epoch': 2} {'type': 'loss', 'content': 0.10713225603103638, 'timestamp': '2025-09-30 22:34:22.629268', 'step': 14038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:22.686343', 'step': 14038, 'epoch': 2} {'type': 'loss', 'content': 0.10211023688316345, 'timestamp': '2025-09-30 22:34:22.688564', 'step': 14039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:22.746587', 'step': 14039, 'epoch': 2} {'type': 'loss', 'content': 0.14939655363559723, 'timestamp': '2025-09-30 22:34:22.752701', 'step': 14040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:22.808408', 'step': 14040, 'epoch': 2} {'type': 'loss', 'content': 0.1598782241344452, 'timestamp': '2025-09-30 22:34:22.810733', 'step': 14041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:22.867564', 'step': 14041, 'epoch': 2} {'type': 'loss', 'content': 0.07708597183227539, 'timestamp': '2025-09-30 22:34:22.871459', 'step': 14042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:22.935978', 'step': 14042, 'epoch': 2} {'type': 'loss', 'content': 0.07168390601873398, 'timestamp': '2025-09-30 22:34:22.940206', 'step': 14043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:22.998282', 'step': 14043, 'epoch': 2} {'type': 'loss', 'content': 0.15211547911167145, 'timestamp': '2025-09-30 22:34:23.004045', 'step': 14044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:23.061546', 'step': 14044, 'epoch': 2} {'type': 'loss', 'content': 0.20946872234344482, 'timestamp': '2025-09-30 22:34:23.067579', 'step': 14045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:23.125206', 'step': 14045, 'epoch': 2} {'type': 'loss', 'content': 0.04354790970683098, 'timestamp': '2025-09-30 22:34:23.127592', 'step': 14046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:23.190714', 'step': 14046, 'epoch': 2} {'type': 'loss', 'content': 0.10611286014318466, 'timestamp': '2025-09-30 22:34:23.195627', 'step': 14047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:23.255450', 'step': 14047, 'epoch': 2} {'type': 'loss', 'content': 0.13469409942626953, 'timestamp': '2025-09-30 22:34:23.264547', 'step': 14048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:23.331455', 'step': 14048, 'epoch': 2} {'type': 'loss', 'content': 0.13018806278705597, 'timestamp': '2025-09-30 22:34:23.334765', 'step': 14049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:23.393461', 'step': 14049, 'epoch': 2} {'type': 'loss', 'content': 0.1093141958117485, 'timestamp': '2025-09-30 22:34:23.395884', 'step': 14050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:23.458520', 'step': 14050, 'epoch': 2} {'type': 'loss', 'content': 0.08527234196662903, 'timestamp': '2025-09-30 22:34:23.462321', 'step': 14051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:23.523528', 'step': 14051, 'epoch': 2} {'type': 'loss', 'content': 0.1865694373846054, 'timestamp': '2025-09-30 22:34:23.532322', 'step': 14052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:23.592121', 'step': 14052, 'epoch': 2} {'type': 'loss', 'content': 0.0787665918469429, 'timestamp': '2025-09-30 22:34:23.597838', 'step': 14053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:23.658325', 'step': 14053, 'epoch': 2} {'type': 'loss', 'content': 0.15739692747592926, 'timestamp': '2025-09-30 22:34:23.660749', 'step': 14054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:23.731303', 'step': 14054, 'epoch': 2} {'type': 'loss', 'content': 0.08520174026489258, 'timestamp': '2025-09-30 22:34:23.733946', 'step': 14055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:23.794451', 'step': 14055, 'epoch': 2} {'type': 'loss', 'content': 0.09677048027515411, 'timestamp': '2025-09-30 22:34:23.802698', 'step': 14056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:23.861825', 'step': 14056, 'epoch': 2} {'type': 'loss', 'content': 0.13813185691833496, 'timestamp': '2025-09-30 22:34:23.866657', 'step': 14057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:23.940229', 'step': 14057, 'epoch': 2} {'type': 'loss', 'content': 0.03589543327689171, 'timestamp': '2025-09-30 22:34:23.943060', 'step': 14058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:24.012776', 'step': 14058, 'epoch': 2} {'type': 'loss', 'content': 0.03264021500945091, 'timestamp': '2025-09-30 22:34:24.017264', 'step': 14059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:24.074082', 'step': 14059, 'epoch': 2} {'type': 'loss', 'content': 0.08195212483406067, 'timestamp': '2025-09-30 22:34:24.083199', 'step': 14060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:24.143762', 'step': 14060, 'epoch': 2} {'type': 'loss', 'content': 0.07731286436319351, 'timestamp': '2025-09-30 22:34:24.147741', 'step': 14061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:24.206883', 'step': 14061, 'epoch': 2} {'type': 'loss', 'content': 0.1120447963476181, 'timestamp': '2025-09-30 22:34:24.209727', 'step': 14062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:24.272974', 'step': 14062, 'epoch': 2} {'type': 'loss', 'content': 0.16977320611476898, 'timestamp': '2025-09-30 22:34:24.275544', 'step': 14063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:24.332205', 'step': 14063, 'epoch': 2} {'type': 'loss', 'content': 0.21721097826957703, 'timestamp': '2025-09-30 22:34:24.343065', 'step': 14064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:24.399799', 'step': 14064, 'epoch': 2} {'type': 'loss', 'content': 0.04656320437788963, 'timestamp': '2025-09-30 22:34:24.403023', 'step': 14065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:24.472277', 'step': 14065, 'epoch': 2} {'type': 'loss', 'content': 0.09066709876060486, 'timestamp': '2025-09-30 22:34:24.477854', 'step': 14066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:24.539501', 'step': 14066, 'epoch': 2} {'type': 'loss', 'content': 0.07747060060501099, 'timestamp': '2025-09-30 22:34:24.542215', 'step': 14067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:24.600125', 'step': 14067, 'epoch': 2} {'type': 'loss', 'content': 0.10349971055984497, 'timestamp': '2025-09-30 22:34:24.607518', 'step': 14068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:24.665055', 'step': 14068, 'epoch': 2} {'type': 'loss', 'content': 0.15161988139152527, 'timestamp': '2025-09-30 22:34:24.667629', 'step': 14069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:24.724686', 'step': 14069, 'epoch': 2} {'type': 'loss', 'content': 0.16875672340393066, 'timestamp': '2025-09-30 22:34:24.727092', 'step': 14070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:24.783677', 'step': 14070, 'epoch': 2} {'type': 'loss', 'content': 0.12224198877811432, 'timestamp': '2025-09-30 22:34:24.786058', 'step': 14071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:24.844693', 'step': 14071, 'epoch': 2} {'type': 'loss', 'content': 0.10109644383192062, 'timestamp': '2025-09-30 22:34:24.850785', 'step': 14072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:24.907853', 'step': 14072, 'epoch': 2} {'type': 'loss', 'content': 0.12945882976055145, 'timestamp': '2025-09-30 22:34:24.912029', 'step': 14073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:24.975341', 'step': 14073, 'epoch': 2} {'type': 'loss', 'content': 0.11342152208089828, 'timestamp': '2025-09-30 22:34:24.977788', 'step': 14074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:25.037936', 'step': 14074, 'epoch': 2} {'type': 'loss', 'content': 0.06583517044782639, 'timestamp': '2025-09-30 22:34:25.042179', 'step': 14075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:25.102681', 'step': 14075, 'epoch': 2} {'type': 'loss', 'content': 0.16495585441589355, 'timestamp': '2025-09-30 22:34:25.109817', 'step': 14076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:25.184402', 'step': 14076, 'epoch': 2} {'type': 'loss', 'content': 0.08068032562732697, 'timestamp': '2025-09-30 22:34:25.191127', 'step': 14077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:25.261160', 'step': 14077, 'epoch': 2} {'type': 'loss', 'content': 0.04751690477132797, 'timestamp': '2025-09-30 22:34:25.264019', 'step': 14078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:25.322182', 'step': 14078, 'epoch': 2} {'type': 'loss', 'content': 0.07279696315526962, 'timestamp': '2025-09-30 22:34:25.325354', 'step': 14079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:25.387159', 'step': 14079, 'epoch': 2} {'type': 'loss', 'content': 0.08139927685260773, 'timestamp': '2025-09-30 22:34:25.396323', 'step': 14080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:25.452686', 'step': 14080, 'epoch': 2} {'type': 'loss', 'content': 0.0946345254778862, 'timestamp': '2025-09-30 22:34:25.462397', 'step': 14081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:25.538610', 'step': 14081, 'epoch': 2} {'type': 'loss', 'content': 0.1059601679444313, 'timestamp': '2025-09-30 22:34:25.541586', 'step': 14082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:25.609283', 'step': 14082, 'epoch': 2} {'type': 'loss', 'content': 0.14080707728862762, 'timestamp': '2025-09-30 22:34:25.620230', 'step': 14083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:25.688680', 'step': 14083, 'epoch': 2} {'type': 'loss', 'content': 0.08705879747867584, 'timestamp': '2025-09-30 22:34:25.705883', 'step': 14084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:25.763498', 'step': 14084, 'epoch': 2} {'type': 'loss', 'content': 0.15483137965202332, 'timestamp': '2025-09-30 22:34:25.766766', 'step': 14085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:25.834572', 'step': 14085, 'epoch': 2} {'type': 'loss', 'content': 0.11526872962713242, 'timestamp': '2025-09-30 22:34:25.838748', 'step': 14086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:25.896131', 'step': 14086, 'epoch': 2} {'type': 'loss', 'content': 0.050992436707019806, 'timestamp': '2025-09-30 22:34:25.898353', 'step': 14087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:25.960168', 'step': 14087, 'epoch': 2} {'type': 'loss', 'content': 0.11123087257146835, 'timestamp': '2025-09-30 22:34:25.966793', 'step': 14088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:26.024752', 'step': 14088, 'epoch': 2} {'type': 'loss', 'content': 0.19953308999538422, 'timestamp': '2025-09-30 22:34:26.030947', 'step': 14089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:26.101060', 'step': 14089, 'epoch': 2} {'type': 'loss', 'content': 0.10371243953704834, 'timestamp': '2025-09-30 22:34:26.103290', 'step': 14090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:26.161362', 'step': 14090, 'epoch': 2} {'type': 'loss', 'content': 0.13999341428279877, 'timestamp': '2025-09-30 22:34:26.165888', 'step': 14091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:26.228393', 'step': 14091, 'epoch': 2} {'type': 'loss', 'content': 0.07913937419652939, 'timestamp': '2025-09-30 22:34:26.238673', 'step': 14092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:26.303805', 'step': 14092, 'epoch': 2} {'type': 'loss', 'content': 0.07974329590797424, 'timestamp': '2025-09-30 22:34:26.306394', 'step': 14093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:26.365511', 'step': 14093, 'epoch': 2} {'type': 'loss', 'content': 0.09361421316862106, 'timestamp': '2025-09-30 22:34:26.374085', 'step': 14094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:26.439431', 'step': 14094, 'epoch': 2} {'type': 'loss', 'content': 0.18155129253864288, 'timestamp': '2025-09-30 22:34:26.447522', 'step': 14095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:26.508217', 'step': 14095, 'epoch': 2} {'type': 'loss', 'content': 0.09269575029611588, 'timestamp': '2025-09-30 22:34:26.515933', 'step': 14096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:26.581707', 'step': 14096, 'epoch': 2} {'type': 'loss', 'content': 0.024349989369511604, 'timestamp': '2025-09-30 22:34:26.586155', 'step': 14097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:26.646109', 'step': 14097, 'epoch': 2} {'type': 'loss', 'content': 0.11337673664093018, 'timestamp': '2025-09-30 22:34:26.654004', 'step': 14098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:26.712686', 'step': 14098, 'epoch': 2} {'type': 'loss', 'content': 0.11818568408489227, 'timestamp': '2025-09-30 22:34:26.715592', 'step': 14099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:26.773174', 'step': 14099, 'epoch': 2} {'type': 'loss', 'content': 0.13883747160434723, 'timestamp': '2025-09-30 22:34:26.779894', 'step': 14100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:26.837302', 'step': 14100, 'epoch': 2} {'type': 'loss', 'content': 0.16978855431079865, 'timestamp': '2025-09-30 22:34:26.844387', 'step': 14101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:26.909786', 'step': 14101, 'epoch': 2} {'type': 'loss', 'content': 0.06961365044116974, 'timestamp': '2025-09-30 22:34:26.913226', 'step': 14102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:34:26.976273', 'step': 14102, 'epoch': 2} {'type': 'loss', 'content': 0.17752699553966522, 'timestamp': '2025-09-30 22:34:26.980252', 'step': 14103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:27.049658', 'step': 14103, 'epoch': 2} {'type': 'loss', 'content': 0.14215996861457825, 'timestamp': '2025-09-30 22:34:27.057656', 'step': 14104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:27.117941', 'step': 14104, 'epoch': 2} {'type': 'loss', 'content': 0.0656614601612091, 'timestamp': '2025-09-30 22:34:27.123919', 'step': 14105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:27.187544', 'step': 14105, 'epoch': 2} {'type': 'loss', 'content': 0.08804141730070114, 'timestamp': '2025-09-30 22:34:27.190683', 'step': 14106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:27.250769', 'step': 14106, 'epoch': 2} {'type': 'loss', 'content': 0.04317444562911987, 'timestamp': '2025-09-30 22:34:27.256123', 'step': 14107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:27.316916', 'step': 14107, 'epoch': 2} {'type': 'loss', 'content': 0.07430371642112732, 'timestamp': '2025-09-30 22:34:27.322897', 'step': 14108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:27.390119', 'step': 14108, 'epoch': 2} {'type': 'loss', 'content': 0.18437646329402924, 'timestamp': '2025-09-30 22:34:27.392797', 'step': 14109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:27.450453', 'step': 14109, 'epoch': 2} {'type': 'loss', 'content': 0.1227850615978241, 'timestamp': '2025-09-30 22:34:27.453399', 'step': 14110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:27.514933', 'step': 14110, 'epoch': 2} {'type': 'loss', 'content': 0.06432674080133438, 'timestamp': '2025-09-30 22:34:27.517580', 'step': 14111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:27.574928', 'step': 14111, 'epoch': 2} {'type': 'loss', 'content': 0.13541123270988464, 'timestamp': '2025-09-30 22:34:27.587124', 'step': 14112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:27.648384', 'step': 14112, 'epoch': 2} {'type': 'loss', 'content': 0.1209457516670227, 'timestamp': '2025-09-30 22:34:27.652994', 'step': 14113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:27.710532', 'step': 14113, 'epoch': 2} {'type': 'loss', 'content': 0.0787544697523117, 'timestamp': '2025-09-30 22:34:27.715371', 'step': 14114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:27.774668', 'step': 14114, 'epoch': 2} {'type': 'loss', 'content': 0.16507498919963837, 'timestamp': '2025-09-30 22:34:27.785931', 'step': 14115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:27.844773', 'step': 14115, 'epoch': 2} {'type': 'loss', 'content': 0.11992469429969788, 'timestamp': '2025-09-30 22:34:27.851113', 'step': 14116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:27.909183', 'step': 14116, 'epoch': 2} {'type': 'loss', 'content': 0.08684197813272476, 'timestamp': '2025-09-30 22:34:27.912629', 'step': 14117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:27.973870', 'step': 14117, 'epoch': 2} {'type': 'loss', 'content': 0.17843855917453766, 'timestamp': '2025-09-30 22:34:27.977901', 'step': 14118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:28.053692', 'step': 14118, 'epoch': 2} {'type': 'loss', 'content': 0.06712716817855835, 'timestamp': '2025-09-30 22:34:28.060408', 'step': 14119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:28.117933', 'step': 14119, 'epoch': 2} {'type': 'loss', 'content': 0.20528052747249603, 'timestamp': '2025-09-30 22:34:28.125594', 'step': 14120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:28.185389', 'step': 14120, 'epoch': 2} {'type': 'loss', 'content': 0.04007734730839729, 'timestamp': '2025-09-30 22:34:28.189108', 'step': 14121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:28.247342', 'step': 14121, 'epoch': 2} {'type': 'loss', 'content': 0.07470542192459106, 'timestamp': '2025-09-30 22:34:28.249543', 'step': 14122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:28.320519', 'step': 14122, 'epoch': 2} {'type': 'loss', 'content': 0.07139100879430771, 'timestamp': '2025-09-30 22:34:28.322904', 'step': 14123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:28.379866', 'step': 14123, 'epoch': 2} {'type': 'loss', 'content': 0.11083965003490448, 'timestamp': '2025-09-30 22:34:28.388797', 'step': 14124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:28.447325', 'step': 14124, 'epoch': 2} {'type': 'loss', 'content': 0.07928222417831421, 'timestamp': '2025-09-30 22:34:28.451032', 'step': 14125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:28.507276', 'step': 14125, 'epoch': 2} {'type': 'loss', 'content': 0.05871595814824104, 'timestamp': '2025-09-30 22:34:28.515618', 'step': 14126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:28.572637', 'step': 14126, 'epoch': 2} {'type': 'loss', 'content': 0.27232733368873596, 'timestamp': '2025-09-30 22:34:28.577332', 'step': 14127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:28.638798', 'step': 14127, 'epoch': 2} {'type': 'loss', 'content': 0.12144029140472412, 'timestamp': '2025-09-30 22:34:28.651945', 'step': 14128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:28.709296', 'step': 14128, 'epoch': 2} {'type': 'loss', 'content': 0.11006669700145721, 'timestamp': '2025-09-30 22:34:28.714999', 'step': 14129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:28.774035', 'step': 14129, 'epoch': 2} {'type': 'loss', 'content': 0.09941542148590088, 'timestamp': '2025-09-30 22:34:28.780040', 'step': 14130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:28.840261', 'step': 14130, 'epoch': 2} {'type': 'loss', 'content': 0.010181618854403496, 'timestamp': '2025-09-30 22:34:28.842955', 'step': 14131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:28.901523', 'step': 14131, 'epoch': 2} {'type': 'loss', 'content': 0.09734860807657242, 'timestamp': '2025-09-30 22:34:28.908193', 'step': 14132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:28.965432', 'step': 14132, 'epoch': 2} {'type': 'loss', 'content': 0.13532963395118713, 'timestamp': '2025-09-30 22:34:28.968204', 'step': 14133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:29.026065', 'step': 14133, 'epoch': 2} {'type': 'loss', 'content': 0.1822023093700409, 'timestamp': '2025-09-30 22:34:29.034414', 'step': 14134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:29.091910', 'step': 14134, 'epoch': 2} {'type': 'loss', 'content': 0.18452975153923035, 'timestamp': '2025-09-30 22:34:29.098822', 'step': 14135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:29.156619', 'step': 14135, 'epoch': 2} {'type': 'loss', 'content': 0.0942821279168129, 'timestamp': '2025-09-30 22:34:29.162894', 'step': 14136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:29.221856', 'step': 14136, 'epoch': 2} {'type': 'loss', 'content': 0.07755018025636673, 'timestamp': '2025-09-30 22:34:29.225154', 'step': 14137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:29.282583', 'step': 14137, 'epoch': 2} {'type': 'loss', 'content': 0.11928229033946991, 'timestamp': '2025-09-30 22:34:29.285301', 'step': 14138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:29.346378', 'step': 14138, 'epoch': 2} {'type': 'loss', 'content': 0.16142703592777252, 'timestamp': '2025-09-30 22:34:29.349524', 'step': 14139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:29.407656', 'step': 14139, 'epoch': 2} {'type': 'loss', 'content': 0.10852029174566269, 'timestamp': '2025-09-30 22:34:29.414657', 'step': 14140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:29.473760', 'step': 14140, 'epoch': 2} {'type': 'loss', 'content': 0.07446461915969849, 'timestamp': '2025-09-30 22:34:29.476806', 'step': 14141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:29.535361', 'step': 14141, 'epoch': 2} {'type': 'loss', 'content': 0.09837484359741211, 'timestamp': '2025-09-30 22:34:29.540879', 'step': 14142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:29.598696', 'step': 14142, 'epoch': 2} {'type': 'loss', 'content': 0.22126150131225586, 'timestamp': '2025-09-30 22:34:29.601499', 'step': 14143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:29.661427', 'step': 14143, 'epoch': 2} {'type': 'loss', 'content': 0.16612686216831207, 'timestamp': '2025-09-30 22:34:29.673561', 'step': 14144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:29.740701', 'step': 14144, 'epoch': 2} {'type': 'loss', 'content': 0.15822690725326538, 'timestamp': '2025-09-30 22:34:29.751077', 'step': 14145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:29.808621', 'step': 14145, 'epoch': 2} {'type': 'loss', 'content': 0.06548123061656952, 'timestamp': '2025-09-30 22:34:29.811733', 'step': 14146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:29.875499', 'step': 14146, 'epoch': 2} {'type': 'loss', 'content': 0.0948595181107521, 'timestamp': '2025-09-30 22:34:29.878220', 'step': 14147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:29.935154', 'step': 14147, 'epoch': 2} {'type': 'loss', 'content': 0.12745904922485352, 'timestamp': '2025-09-30 22:34:29.942157', 'step': 14148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:30.003532', 'step': 14148, 'epoch': 2} {'type': 'loss', 'content': 0.19745638966560364, 'timestamp': '2025-09-30 22:34:30.006509', 'step': 14149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:30.071881', 'step': 14149, 'epoch': 2} {'type': 'loss', 'content': 0.1634901463985443, 'timestamp': '2025-09-30 22:34:30.075876', 'step': 14150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:30.134277', 'step': 14150, 'epoch': 2} {'type': 'loss', 'content': 0.12544579803943634, 'timestamp': '2025-09-30 22:34:30.138883', 'step': 14151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:30.201179', 'step': 14151, 'epoch': 2} {'type': 'loss', 'content': 0.09712263196706772, 'timestamp': '2025-09-30 22:34:30.208339', 'step': 14152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:30.281684', 'step': 14152, 'epoch': 2} {'type': 'loss', 'content': 0.16444827616214752, 'timestamp': '2025-09-30 22:34:30.286501', 'step': 14153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:30.355066', 'step': 14153, 'epoch': 2} {'type': 'loss', 'content': 0.08955149352550507, 'timestamp': '2025-09-30 22:34:30.365650', 'step': 14154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:30.471239', 'step': 14154, 'epoch': 2} {'type': 'loss', 'content': 0.13206452131271362, 'timestamp': '2025-09-30 22:34:30.476323', 'step': 14155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:30.569884', 'step': 14155, 'epoch': 2} {'type': 'loss', 'content': 0.10221000760793686, 'timestamp': '2025-09-30 22:34:30.576717', 'step': 14156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:30.681344', 'step': 14156, 'epoch': 2} {'type': 'loss', 'content': 0.14800934493541718, 'timestamp': '2025-09-30 22:34:30.684244', 'step': 14157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:30.769474', 'step': 14157, 'epoch': 2} {'type': 'loss', 'content': 0.1505502611398697, 'timestamp': '2025-09-30 22:34:30.772086', 'step': 14158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:30.867068', 'step': 14158, 'epoch': 2} {'type': 'loss', 'content': 0.1312379390001297, 'timestamp': '2025-09-30 22:34:30.871573', 'step': 14159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:30.986635', 'step': 14159, 'epoch': 2} {'type': 'loss', 'content': 0.08664397895336151, 'timestamp': '2025-09-30 22:34:30.993609', 'step': 14160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:31.076573', 'step': 14160, 'epoch': 2} {'type': 'loss', 'content': 0.13092869520187378, 'timestamp': '2025-09-30 22:34:31.078699', 'step': 14161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:31.160844', 'step': 14161, 'epoch': 2} {'type': 'loss', 'content': 0.137481689453125, 'timestamp': '2025-09-30 22:34:31.163908', 'step': 14162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:31.262019', 'step': 14162, 'epoch': 2} {'type': 'loss', 'content': 0.12063233554363251, 'timestamp': '2025-09-30 22:34:31.265580', 'step': 14163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:31.346559', 'step': 14163, 'epoch': 2} {'type': 'loss', 'content': 0.1084471270442009, 'timestamp': '2025-09-30 22:34:31.354748', 'step': 14164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:31.442064', 'step': 14164, 'epoch': 2} {'type': 'loss', 'content': 0.17055991291999817, 'timestamp': '2025-09-30 22:34:31.444931', 'step': 14165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:34:31.532070', 'step': 14165, 'epoch': 2} {'type': 'loss', 'content': 0.13195233047008514, 'timestamp': '2025-09-30 22:34:31.535692', 'step': 14166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:31.633937', 'step': 14166, 'epoch': 2} {'type': 'loss', 'content': 0.1405172497034073, 'timestamp': '2025-09-30 22:34:31.637183', 'step': 14167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:31.727459', 'step': 14167, 'epoch': 2} {'type': 'loss', 'content': 0.13374529778957367, 'timestamp': '2025-09-30 22:34:31.736732', 'step': 14168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:31.838290', 'step': 14168, 'epoch': 2} {'type': 'loss', 'content': 0.048001695424318314, 'timestamp': '2025-09-30 22:34:31.841165', 'step': 14169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:31.903548', 'step': 14169, 'epoch': 2} {'type': 'loss', 'content': 0.15828712284564972, 'timestamp': '2025-09-30 22:34:31.906575', 'step': 14170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:31.968976', 'step': 14170, 'epoch': 2} {'type': 'loss', 'content': 0.19741350412368774, 'timestamp': '2025-09-30 22:34:31.971177', 'step': 14171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:32.029083', 'step': 14171, 'epoch': 2} {'type': 'loss', 'content': 0.20989368855953217, 'timestamp': '2025-09-30 22:34:32.035004', 'step': 14172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:32.098016', 'step': 14172, 'epoch': 2} {'type': 'loss', 'content': 0.08909449726343155, 'timestamp': '2025-09-30 22:34:32.102226', 'step': 14173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:32.160240', 'step': 14173, 'epoch': 2} {'type': 'loss', 'content': 0.12818710505962372, 'timestamp': '2025-09-30 22:34:32.163272', 'step': 14174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:32.221033', 'step': 14174, 'epoch': 2} {'type': 'loss', 'content': 0.08265799283981323, 'timestamp': '2025-09-30 22:34:32.224483', 'step': 14175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:32.281844', 'step': 14175, 'epoch': 2} {'type': 'loss', 'content': 0.15332387387752533, 'timestamp': '2025-09-30 22:34:32.297075', 'step': 14176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:34:32.354882', 'step': 14176, 'epoch': 2} {'type': 'loss', 'content': 0.15819400548934937, 'timestamp': '2025-09-30 22:34:32.358447', 'step': 14177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:32.414968', 'step': 14177, 'epoch': 2} {'type': 'loss', 'content': 0.0917355865240097, 'timestamp': '2025-09-30 22:34:32.425357', 'step': 14178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:32.485011', 'step': 14178, 'epoch': 2} {'type': 'loss', 'content': 0.145425945520401, 'timestamp': '2025-09-30 22:34:32.489222', 'step': 14179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:32.548970', 'step': 14179, 'epoch': 2} {'type': 'loss', 'content': 0.10752160102128983, 'timestamp': '2025-09-30 22:34:32.555328', 'step': 14180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:32.615410', 'step': 14180, 'epoch': 2} {'type': 'loss', 'content': 0.10046326369047165, 'timestamp': '2025-09-30 22:34:32.617769', 'step': 14181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:32.675665', 'step': 14181, 'epoch': 2} {'type': 'loss', 'content': 0.21904344856739044, 'timestamp': '2025-09-30 22:34:32.677932', 'step': 14182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:32.736515', 'step': 14182, 'epoch': 2} {'type': 'loss', 'content': 0.155919149518013, 'timestamp': '2025-09-30 22:34:32.740263', 'step': 14183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:32.799873', 'step': 14183, 'epoch': 2} {'type': 'loss', 'content': 0.08246628940105438, 'timestamp': '2025-09-30 22:34:32.806542', 'step': 14184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:32.865889', 'step': 14184, 'epoch': 2} {'type': 'loss', 'content': 0.06629276275634766, 'timestamp': '2025-09-30 22:34:32.869053', 'step': 14185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:32.926735', 'step': 14185, 'epoch': 2} {'type': 'loss', 'content': 0.07522236555814743, 'timestamp': '2025-09-30 22:34:32.930861', 'step': 14186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:32.990520', 'step': 14186, 'epoch': 2} {'type': 'loss', 'content': 0.08300177752971649, 'timestamp': '2025-09-30 22:34:32.993333', 'step': 14187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:33.051833', 'step': 14187, 'epoch': 2} {'type': 'loss', 'content': 0.054840873926877975, 'timestamp': '2025-09-30 22:34:33.057724', 'step': 14188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:33.114466', 'step': 14188, 'epoch': 2} {'type': 'loss', 'content': 0.14246824383735657, 'timestamp': '2025-09-30 22:34:33.116931', 'step': 14189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:33.176458', 'step': 14189, 'epoch': 2} {'type': 'loss', 'content': 0.1979203075170517, 'timestamp': '2025-09-30 22:34:33.179019', 'step': 14190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:33.239651', 'step': 14190, 'epoch': 2} {'type': 'loss', 'content': 0.18478451669216156, 'timestamp': '2025-09-30 22:34:33.242234', 'step': 14191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:33.305740', 'step': 14191, 'epoch': 2} {'type': 'loss', 'content': 0.16236993670463562, 'timestamp': '2025-09-30 22:34:33.315539', 'step': 14192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:33.373597', 'step': 14192, 'epoch': 2} {'type': 'loss', 'content': 0.0956127867102623, 'timestamp': '2025-09-30 22:34:33.376222', 'step': 14193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:33.433672', 'step': 14193, 'epoch': 2} {'type': 'loss', 'content': 0.12449369579553604, 'timestamp': '2025-09-30 22:34:33.438067', 'step': 14194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:33.495603', 'step': 14194, 'epoch': 2} {'type': 'loss', 'content': 0.037813249975442886, 'timestamp': '2025-09-30 22:34:33.498691', 'step': 14195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:33.556894', 'step': 14195, 'epoch': 2} {'type': 'loss', 'content': 0.1141357272863388, 'timestamp': '2025-09-30 22:34:33.562862', 'step': 14196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:33.620239', 'step': 14196, 'epoch': 2} {'type': 'loss', 'content': 0.12713661789894104, 'timestamp': '2025-09-30 22:34:33.623649', 'step': 14197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:33.687078', 'step': 14197, 'epoch': 2} {'type': 'loss', 'content': 0.1270648092031479, 'timestamp': '2025-09-30 22:34:33.690404', 'step': 14198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:33.748583', 'step': 14198, 'epoch': 2} {'type': 'loss', 'content': 0.12098057568073273, 'timestamp': '2025-09-30 22:34:33.751216', 'step': 14199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:33.809423', 'step': 14199, 'epoch': 2} {'type': 'loss', 'content': 0.09872019290924072, 'timestamp': '2025-09-30 22:34:33.816344', 'step': 14200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:33.872901', 'step': 14200, 'epoch': 2} {'type': 'loss', 'content': 0.13639380037784576, 'timestamp': '2025-09-30 22:34:33.876014', 'step': 14201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:33.933511', 'step': 14201, 'epoch': 2} {'type': 'loss', 'content': 0.0901997908949852, 'timestamp': '2025-09-30 22:34:33.936133', 'step': 14202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:33.993144', 'step': 14202, 'epoch': 2} {'type': 'loss', 'content': 0.10311262309551239, 'timestamp': '2025-09-30 22:34:33.995645', 'step': 14203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:34.053204', 'step': 14203, 'epoch': 2} {'type': 'loss', 'content': 0.10515002906322479, 'timestamp': '2025-09-30 22:34:34.066618', 'step': 14204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:34.123404', 'step': 14204, 'epoch': 2} {'type': 'loss', 'content': 0.0934387817978859, 'timestamp': '2025-09-30 22:34:34.125952', 'step': 14205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:34.182460', 'step': 14205, 'epoch': 2} {'type': 'loss', 'content': 0.09640302509069443, 'timestamp': '2025-09-30 22:34:34.185318', 'step': 14206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:34.242766', 'step': 14206, 'epoch': 2} {'type': 'loss', 'content': 0.15346011519432068, 'timestamp': '2025-09-30 22:34:34.246200', 'step': 14207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:34.304526', 'step': 14207, 'epoch': 2} {'type': 'loss', 'content': 0.13623200356960297, 'timestamp': '2025-09-30 22:34:34.310743', 'step': 14208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:34.369740', 'step': 14208, 'epoch': 2} {'type': 'loss', 'content': 0.08488354086875916, 'timestamp': '2025-09-30 22:34:34.372940', 'step': 14209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:34.430909', 'step': 14209, 'epoch': 2} {'type': 'loss', 'content': 0.18580910563468933, 'timestamp': '2025-09-30 22:34:34.433560', 'step': 14210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:34.506105', 'step': 14210, 'epoch': 2} {'type': 'loss', 'content': 0.07853361964225769, 'timestamp': '2025-09-30 22:34:34.508641', 'step': 14211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:34.567319', 'step': 14211, 'epoch': 2} {'type': 'loss', 'content': 0.15358293056488037, 'timestamp': '2025-09-30 22:34:34.576729', 'step': 14212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:34.632729', 'step': 14212, 'epoch': 2} {'type': 'loss', 'content': 0.15538065135478973, 'timestamp': '2025-09-30 22:34:34.637540', 'step': 14213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:34.694234', 'step': 14213, 'epoch': 2} {'type': 'loss', 'content': 0.08338012546300888, 'timestamp': '2025-09-30 22:34:34.703743', 'step': 14214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:34.763336', 'step': 14214, 'epoch': 2} {'type': 'loss', 'content': 0.16376888751983643, 'timestamp': '2025-09-30 22:34:34.771002', 'step': 14215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:34.829688', 'step': 14215, 'epoch': 2} {'type': 'loss', 'content': 0.13727280497550964, 'timestamp': '2025-09-30 22:34:34.835903', 'step': 14216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:34.900991', 'step': 14216, 'epoch': 2} {'type': 'loss', 'content': 0.09762739390134811, 'timestamp': '2025-09-30 22:34:34.903767', 'step': 14217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:34.960855', 'step': 14217, 'epoch': 2} {'type': 'loss', 'content': 0.03676468878984451, 'timestamp': '2025-09-30 22:34:34.963479', 'step': 14218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:35.021822', 'step': 14218, 'epoch': 2} {'type': 'loss', 'content': 0.09479620307683945, 'timestamp': '2025-09-30 22:34:35.025662', 'step': 14219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:35.092554', 'step': 14219, 'epoch': 2} {'type': 'loss', 'content': 0.07450920343399048, 'timestamp': '2025-09-30 22:34:35.098786', 'step': 14220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:35.160863', 'step': 14220, 'epoch': 2} {'type': 'loss', 'content': 0.15693601965904236, 'timestamp': '2025-09-30 22:34:35.163456', 'step': 14221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:35.226443', 'step': 14221, 'epoch': 2} {'type': 'loss', 'content': 0.13469703495502472, 'timestamp': '2025-09-30 22:34:35.230389', 'step': 14222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:35.287902', 'step': 14222, 'epoch': 2} {'type': 'loss', 'content': 0.07783951610326767, 'timestamp': '2025-09-30 22:34:35.290170', 'step': 14223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:35.349138', 'step': 14223, 'epoch': 2} {'type': 'loss', 'content': 0.06078535318374634, 'timestamp': '2025-09-30 22:34:35.355232', 'step': 14224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:35.416614', 'step': 14224, 'epoch': 2} {'type': 'loss', 'content': 0.15263274312019348, 'timestamp': '2025-09-30 22:34:35.419171', 'step': 14225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:35.475890', 'step': 14225, 'epoch': 2} {'type': 'loss', 'content': 0.15469983220100403, 'timestamp': '2025-09-30 22:34:35.483299', 'step': 14226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:35.555295', 'step': 14226, 'epoch': 2} {'type': 'loss', 'content': 0.13504402339458466, 'timestamp': '2025-09-30 22:34:35.558631', 'step': 14227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:35.617337', 'step': 14227, 'epoch': 2} {'type': 'loss', 'content': 0.17772434651851654, 'timestamp': '2025-09-30 22:34:35.625214', 'step': 14228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:35.680861', 'step': 14228, 'epoch': 2} {'type': 'loss', 'content': 0.0282032061368227, 'timestamp': '2025-09-30 22:34:35.684060', 'step': 14229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:35.742219', 'step': 14229, 'epoch': 2} {'type': 'loss', 'content': 0.1889817714691162, 'timestamp': '2025-09-30 22:34:35.746345', 'step': 14230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:35.806373', 'step': 14230, 'epoch': 2} {'type': 'loss', 'content': 0.08007842302322388, 'timestamp': '2025-09-30 22:34:35.809879', 'step': 14231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:35.868267', 'step': 14231, 'epoch': 2} {'type': 'loss', 'content': 0.1702105551958084, 'timestamp': '2025-09-30 22:34:35.875668', 'step': 14232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:35.933313', 'step': 14232, 'epoch': 2} {'type': 'loss', 'content': 0.066926509141922, 'timestamp': '2025-09-30 22:34:35.936803', 'step': 14233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:36.000613', 'step': 14233, 'epoch': 2} {'type': 'loss', 'content': 0.1701635867357254, 'timestamp': '2025-09-30 22:34:36.004368', 'step': 14234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:36.063211', 'step': 14234, 'epoch': 2} {'type': 'loss', 'content': 0.036682918667793274, 'timestamp': '2025-09-30 22:34:36.068240', 'step': 14235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:36.139270', 'step': 14235, 'epoch': 2} {'type': 'loss', 'content': 0.08560764044523239, 'timestamp': '2025-09-30 22:34:36.146807', 'step': 14236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:36.205140', 'step': 14236, 'epoch': 2} {'type': 'loss', 'content': 0.07944212853908539, 'timestamp': '2025-09-30 22:34:36.211458', 'step': 14237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:36.271365', 'step': 14237, 'epoch': 2} {'type': 'loss', 'content': 0.15219935774803162, 'timestamp': '2025-09-30 22:34:36.274928', 'step': 14238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:36.335508', 'step': 14238, 'epoch': 2} {'type': 'loss', 'content': 0.09661654382944107, 'timestamp': '2025-09-30 22:34:36.339250', 'step': 14239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:36.399770', 'step': 14239, 'epoch': 2} {'type': 'loss', 'content': 0.15690888464450836, 'timestamp': '2025-09-30 22:34:36.405873', 'step': 14240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:36.465225', 'step': 14240, 'epoch': 2} {'type': 'loss', 'content': 0.044390659779310226, 'timestamp': '2025-09-30 22:34:36.469536', 'step': 14241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:36.531264', 'step': 14241, 'epoch': 2} {'type': 'loss', 'content': 0.07243133336305618, 'timestamp': '2025-09-30 22:34:36.535472', 'step': 14242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:36.594601', 'step': 14242, 'epoch': 2} {'type': 'loss', 'content': 0.20505161583423615, 'timestamp': '2025-09-30 22:34:36.603051', 'step': 14243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:36.663036', 'step': 14243, 'epoch': 2} {'type': 'loss', 'content': 0.125435471534729, 'timestamp': '2025-09-30 22:34:36.670011', 'step': 14244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:36.735504', 'step': 14244, 'epoch': 2} {'type': 'loss', 'content': 0.14238594472408295, 'timestamp': '2025-09-30 22:34:36.739975', 'step': 14245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:36.797514', 'step': 14245, 'epoch': 2} {'type': 'loss', 'content': 0.08218947798013687, 'timestamp': '2025-09-30 22:34:36.802886', 'step': 14246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:36.861515', 'step': 14246, 'epoch': 2} {'type': 'loss', 'content': 0.2051357924938202, 'timestamp': '2025-09-30 22:34:36.865862', 'step': 14247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:36.926231', 'step': 14247, 'epoch': 2} {'type': 'loss', 'content': 0.0777304396033287, 'timestamp': '2025-09-30 22:34:36.932891', 'step': 14248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:36.993251', 'step': 14248, 'epoch': 2} {'type': 'loss', 'content': 0.1091790571808815, 'timestamp': '2025-09-30 22:34:36.995746', 'step': 14249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.053213', 'step': 14249, 'epoch': 2} {'type': 'loss', 'content': 0.14286769926548004, 'timestamp': '2025-09-30 22:34:37.056732', 'step': 14250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.115521', 'step': 14250, 'epoch': 2} {'type': 'loss', 'content': 0.11009247601032257, 'timestamp': '2025-09-30 22:34:37.117664', 'step': 14251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:37.178330', 'step': 14251, 'epoch': 2} {'type': 'loss', 'content': 0.11303714662790298, 'timestamp': '2025-09-30 22:34:37.184496', 'step': 14252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:37.240712', 'step': 14252, 'epoch': 2} {'type': 'loss', 'content': 0.08441701531410217, 'timestamp': '2025-09-30 22:34:37.244773', 'step': 14253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:37.303133', 'step': 14253, 'epoch': 2} {'type': 'loss', 'content': 0.1092141717672348, 'timestamp': '2025-09-30 22:34:37.305389', 'step': 14254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:37.364624', 'step': 14254, 'epoch': 2} {'type': 'loss', 'content': 0.06338769942522049, 'timestamp': '2025-09-30 22:34:37.367211', 'step': 14255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.425277', 'step': 14255, 'epoch': 2} {'type': 'loss', 'content': 0.10568936914205551, 'timestamp': '2025-09-30 22:34:37.431860', 'step': 14256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:37.489256', 'step': 14256, 'epoch': 2} {'type': 'loss', 'content': 0.14316464960575104, 'timestamp': '2025-09-30 22:34:37.492294', 'step': 14257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.549094', 'step': 14257, 'epoch': 2} {'type': 'loss', 'content': 0.15510016679763794, 'timestamp': '2025-09-30 22:34:37.562524', 'step': 14258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.620998', 'step': 14258, 'epoch': 2} {'type': 'loss', 'content': 0.101322703063488, 'timestamp': '2025-09-30 22:34:37.623463', 'step': 14259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:37.681600', 'step': 14259, 'epoch': 2} {'type': 'loss', 'content': 0.19637738168239594, 'timestamp': '2025-09-30 22:34:37.687833', 'step': 14260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.744851', 'step': 14260, 'epoch': 2} {'type': 'loss', 'content': 0.14541427791118622, 'timestamp': '2025-09-30 22:34:37.747965', 'step': 14261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:37.804739', 'step': 14261, 'epoch': 2} {'type': 'loss', 'content': 0.18545569479465485, 'timestamp': '2025-09-30 22:34:37.807065', 'step': 14262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.865153', 'step': 14262, 'epoch': 2} {'type': 'loss', 'content': 0.17221643030643463, 'timestamp': '2025-09-30 22:34:37.867367', 'step': 14263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:37.925204', 'step': 14263, 'epoch': 2} {'type': 'loss', 'content': 0.08123606443405151, 'timestamp': '2025-09-30 22:34:37.932322', 'step': 14264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:38.002656', 'step': 14264, 'epoch': 2} {'type': 'loss', 'content': 0.1544525921344757, 'timestamp': '2025-09-30 22:34:38.005590', 'step': 14265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:38.064433', 'step': 14265, 'epoch': 2} {'type': 'loss', 'content': 0.09161049872636795, 'timestamp': '2025-09-30 22:34:38.067580', 'step': 14266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.126505', 'step': 14266, 'epoch': 2} {'type': 'loss', 'content': 0.08431579172611237, 'timestamp': '2025-09-30 22:34:38.129622', 'step': 14267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:38.187754', 'step': 14267, 'epoch': 2} {'type': 'loss', 'content': 0.12251090258359909, 'timestamp': '2025-09-30 22:34:38.194379', 'step': 14268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:38.249617', 'step': 14268, 'epoch': 2} {'type': 'loss', 'content': 0.13394595682621002, 'timestamp': '2025-09-30 22:34:38.251911', 'step': 14269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:38.308837', 'step': 14269, 'epoch': 2} {'type': 'loss', 'content': 0.12918581068515778, 'timestamp': '2025-09-30 22:34:38.311226', 'step': 14270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:38.369913', 'step': 14270, 'epoch': 2} {'type': 'loss', 'content': 0.0824035033583641, 'timestamp': '2025-09-30 22:34:38.372259', 'step': 14271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:38.436997', 'step': 14271, 'epoch': 2} {'type': 'loss', 'content': 0.1361553817987442, 'timestamp': '2025-09-30 22:34:38.443294', 'step': 14272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.504956', 'step': 14272, 'epoch': 2} {'type': 'loss', 'content': 0.07536225765943527, 'timestamp': '2025-09-30 22:34:38.512050', 'step': 14273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.569665', 'step': 14273, 'epoch': 2} {'type': 'loss', 'content': 0.0732422024011612, 'timestamp': '2025-09-30 22:34:38.573854', 'step': 14274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.635642', 'step': 14274, 'epoch': 2} {'type': 'loss', 'content': 0.10519109666347504, 'timestamp': '2025-09-30 22:34:38.643367', 'step': 14275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.700468', 'step': 14275, 'epoch': 2} {'type': 'loss', 'content': 0.06758339703083038, 'timestamp': '2025-09-30 22:34:38.706475', 'step': 14276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.764668', 'step': 14276, 'epoch': 2} {'type': 'loss', 'content': 0.09568601101636887, 'timestamp': '2025-09-30 22:34:38.766921', 'step': 14277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.823966', 'step': 14277, 'epoch': 2} {'type': 'loss', 'content': 0.1266060322523117, 'timestamp': '2025-09-30 22:34:38.828943', 'step': 14278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:38.887422', 'step': 14278, 'epoch': 2} {'type': 'loss', 'content': 0.14583076536655426, 'timestamp': '2025-09-30 22:34:38.891289', 'step': 14279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:38.949313', 'step': 14279, 'epoch': 2} {'type': 'loss', 'content': 0.06196209788322449, 'timestamp': '2025-09-30 22:34:38.957583', 'step': 14280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:39.016389', 'step': 14280, 'epoch': 2} {'type': 'loss', 'content': 0.11493527889251709, 'timestamp': '2025-09-30 22:34:39.019398', 'step': 14281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:39.076291', 'step': 14281, 'epoch': 2} {'type': 'loss', 'content': 0.07459288090467453, 'timestamp': '2025-09-30 22:34:39.081798', 'step': 14282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:39.139782', 'step': 14282, 'epoch': 2} {'type': 'loss', 'content': 0.09022010117769241, 'timestamp': '2025-09-30 22:34:39.142506', 'step': 14283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:39.199622', 'step': 14283, 'epoch': 2} {'type': 'loss', 'content': 0.14977973699569702, 'timestamp': '2025-09-30 22:34:39.206297', 'step': 14284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:39.262986', 'step': 14284, 'epoch': 2} {'type': 'loss', 'content': 0.09798610210418701, 'timestamp': '2025-09-30 22:34:39.267125', 'step': 14285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:39.323868', 'step': 14285, 'epoch': 2} {'type': 'loss', 'content': 0.16805385053157806, 'timestamp': '2025-09-30 22:34:39.326076', 'step': 14286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:39.385854', 'step': 14286, 'epoch': 2} {'type': 'loss', 'content': 0.19597773253917694, 'timestamp': '2025-09-30 22:34:39.388882', 'step': 14287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:39.449814', 'step': 14287, 'epoch': 2} {'type': 'loss', 'content': 0.06929796934127808, 'timestamp': '2025-09-30 22:34:39.457117', 'step': 14288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:39.513511', 'step': 14288, 'epoch': 2} {'type': 'loss', 'content': 0.12237513810396194, 'timestamp': '2025-09-30 22:34:39.518212', 'step': 14289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:39.575811', 'step': 14289, 'epoch': 2} {'type': 'loss', 'content': 0.08658682554960251, 'timestamp': '2025-09-30 22:34:39.583464', 'step': 14290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:39.649629', 'step': 14290, 'epoch': 2} {'type': 'loss', 'content': 0.13164904713630676, 'timestamp': '2025-09-30 22:34:39.654974', 'step': 14291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:39.711975', 'step': 14291, 'epoch': 2} {'type': 'loss', 'content': 0.0698104128241539, 'timestamp': '2025-09-30 22:34:39.719911', 'step': 14292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:39.775646', 'step': 14292, 'epoch': 2} {'type': 'loss', 'content': 0.18484137952327728, 'timestamp': '2025-09-30 22:34:39.778477', 'step': 14293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:39.838869', 'step': 14293, 'epoch': 2} {'type': 'loss', 'content': 0.12413080781698227, 'timestamp': '2025-09-30 22:34:39.844645', 'step': 14294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:39.901136', 'step': 14294, 'epoch': 2} {'type': 'loss', 'content': 0.09489939361810684, 'timestamp': '2025-09-30 22:34:39.903275', 'step': 14295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:39.960010', 'step': 14295, 'epoch': 2} {'type': 'loss', 'content': 0.09362144768238068, 'timestamp': '2025-09-30 22:34:39.966466', 'step': 14296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:40.022928', 'step': 14296, 'epoch': 2} {'type': 'loss', 'content': 0.16208671033382416, 'timestamp': '2025-09-30 22:34:40.025168', 'step': 14297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:40.085711', 'step': 14297, 'epoch': 2} {'type': 'loss', 'content': 0.10897661745548248, 'timestamp': '2025-09-30 22:34:40.087908', 'step': 14298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:40.144462', 'step': 14298, 'epoch': 2} {'type': 'loss', 'content': 0.15929089486598969, 'timestamp': '2025-09-30 22:34:40.147612', 'step': 14299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:40.204379', 'step': 14299, 'epoch': 2} {'type': 'loss', 'content': 0.07372993975877762, 'timestamp': '2025-09-30 22:34:40.211044', 'step': 14300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:40.267228', 'step': 14300, 'epoch': 2} {'type': 'loss', 'content': 0.11996859312057495, 'timestamp': '2025-09-30 22:34:40.269940', 'step': 14301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:40.330660', 'step': 14301, 'epoch': 2} {'type': 'loss', 'content': 0.10964251309633255, 'timestamp': '2025-09-30 22:34:40.333330', 'step': 14302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:40.390357', 'step': 14302, 'epoch': 2} {'type': 'loss', 'content': 0.12209661304950714, 'timestamp': '2025-09-30 22:34:40.394682', 'step': 14303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:40.472603', 'step': 14303, 'epoch': 2} {'type': 'loss', 'content': 0.0931398794054985, 'timestamp': '2025-09-30 22:34:40.478766', 'step': 14304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:40.538139', 'step': 14304, 'epoch': 2} {'type': 'loss', 'content': 0.1265094131231308, 'timestamp': '2025-09-30 22:34:40.542855', 'step': 14305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:40.602664', 'step': 14305, 'epoch': 2} {'type': 'loss', 'content': 0.06423836946487427, 'timestamp': '2025-09-30 22:34:40.606019', 'step': 14306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:40.663081', 'step': 14306, 'epoch': 2} {'type': 'loss', 'content': 0.07242733240127563, 'timestamp': '2025-09-30 22:34:40.667471', 'step': 14307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:40.726936', 'step': 14307, 'epoch': 2} {'type': 'loss', 'content': 0.1138700470328331, 'timestamp': '2025-09-30 22:34:40.732920', 'step': 14308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:40.790783', 'step': 14308, 'epoch': 2} {'type': 'loss', 'content': 0.13059043884277344, 'timestamp': '2025-09-30 22:34:40.793241', 'step': 14309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:40.851158', 'step': 14309, 'epoch': 2} {'type': 'loss', 'content': 0.06449061632156372, 'timestamp': '2025-09-30 22:34:40.854632', 'step': 14310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:40.911617', 'step': 14310, 'epoch': 2} {'type': 'loss', 'content': 0.0629701241850853, 'timestamp': '2025-09-30 22:34:40.913915', 'step': 14311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:40.972585', 'step': 14311, 'epoch': 2} {'type': 'loss', 'content': 0.0573866069316864, 'timestamp': '2025-09-30 22:34:40.986518', 'step': 14312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:41.043440', 'step': 14312, 'epoch': 2} {'type': 'loss', 'content': 0.1038285568356514, 'timestamp': '2025-09-30 22:34:41.047733', 'step': 14313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:41.106518', 'step': 14313, 'epoch': 2} {'type': 'loss', 'content': 0.13420255482196808, 'timestamp': '2025-09-30 22:34:41.109293', 'step': 14314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:41.166191', 'step': 14314, 'epoch': 2} {'type': 'loss', 'content': 0.14726118743419647, 'timestamp': '2025-09-30 22:34:41.170516', 'step': 14315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:41.229784', 'step': 14315, 'epoch': 2} {'type': 'loss', 'content': 0.13051071763038635, 'timestamp': '2025-09-30 22:34:41.244276', 'step': 14316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:41.302312', 'step': 14316, 'epoch': 2} {'type': 'loss', 'content': 0.0352449044585228, 'timestamp': '2025-09-30 22:34:41.305348', 'step': 14317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:41.363419', 'step': 14317, 'epoch': 2} {'type': 'loss', 'content': 0.08904781192541122, 'timestamp': '2025-09-30 22:34:41.372304', 'step': 14318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:41.429556', 'step': 14318, 'epoch': 2} {'type': 'loss', 'content': 0.09683234989643097, 'timestamp': '2025-09-30 22:34:41.441074', 'step': 14319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:41.499227', 'step': 14319, 'epoch': 2} {'type': 'loss', 'content': 0.09654289484024048, 'timestamp': '2025-09-30 22:34:41.505864', 'step': 14320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:41.563130', 'step': 14320, 'epoch': 2} {'type': 'loss', 'content': 0.12292779237031937, 'timestamp': '2025-09-30 22:34:41.571177', 'step': 14321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:41.633404', 'step': 14321, 'epoch': 2} {'type': 'loss', 'content': 0.10012195259332657, 'timestamp': '2025-09-30 22:34:41.635969', 'step': 14322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:41.697570', 'step': 14322, 'epoch': 2} {'type': 'loss', 'content': 0.2120644897222519, 'timestamp': '2025-09-30 22:34:41.701567', 'step': 14323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:41.759743', 'step': 14323, 'epoch': 2} {'type': 'loss', 'content': 0.14965659379959106, 'timestamp': '2025-09-30 22:34:41.766004', 'step': 14324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:41.822829', 'step': 14324, 'epoch': 2} {'type': 'loss', 'content': 0.08039561659097672, 'timestamp': '2025-09-30 22:34:41.828510', 'step': 14325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:41.889253', 'step': 14325, 'epoch': 2} {'type': 'loss', 'content': 0.0874660462141037, 'timestamp': '2025-09-30 22:34:41.896211', 'step': 14326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:41.954259', 'step': 14326, 'epoch': 2} {'type': 'loss', 'content': 0.11016573011875153, 'timestamp': '2025-09-30 22:34:41.957040', 'step': 14327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:42.022864', 'step': 14327, 'epoch': 2} {'type': 'loss', 'content': 0.09074881672859192, 'timestamp': '2025-09-30 22:34:42.032483', 'step': 14328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:42.090556', 'step': 14328, 'epoch': 2} {'type': 'loss', 'content': 0.11620452255010605, 'timestamp': '2025-09-30 22:34:42.097636', 'step': 14329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:42.156090', 'step': 14329, 'epoch': 2} {'type': 'loss', 'content': 0.10024087876081467, 'timestamp': '2025-09-30 22:34:42.159133', 'step': 14330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:42.216536', 'step': 14330, 'epoch': 2} {'type': 'loss', 'content': 0.14136630296707153, 'timestamp': '2025-09-30 22:34:42.220268', 'step': 14331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:42.279155', 'step': 14331, 'epoch': 2} {'type': 'loss', 'content': 0.13298572599887848, 'timestamp': '2025-09-30 22:34:42.286170', 'step': 14332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:42.352520', 'step': 14332, 'epoch': 2} {'type': 'loss', 'content': 0.11192723363637924, 'timestamp': '2025-09-30 22:34:42.356891', 'step': 14333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:42.427169', 'step': 14333, 'epoch': 2} {'type': 'loss', 'content': 0.13506266474723816, 'timestamp': '2025-09-30 22:34:42.432250', 'step': 14334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:42.493621', 'step': 14334, 'epoch': 2} {'type': 'loss', 'content': 0.1773533821105957, 'timestamp': '2025-09-30 22:34:42.496112', 'step': 14335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:42.553073', 'step': 14335, 'epoch': 2} {'type': 'loss', 'content': 0.11147821694612503, 'timestamp': '2025-09-30 22:34:42.558961', 'step': 14336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:42.617769', 'step': 14336, 'epoch': 2} {'type': 'loss', 'content': 0.12777285277843475, 'timestamp': '2025-09-30 22:34:42.621916', 'step': 14337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:42.681557', 'step': 14337, 'epoch': 2} {'type': 'loss', 'content': 0.11802516132593155, 'timestamp': '2025-09-30 22:34:42.684533', 'step': 14338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:42.743064', 'step': 14338, 'epoch': 2} {'type': 'loss', 'content': 0.08362556248903275, 'timestamp': '2025-09-30 22:34:42.745546', 'step': 14339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:42.805281', 'step': 14339, 'epoch': 2} {'type': 'loss', 'content': 0.09166036546230316, 'timestamp': '2025-09-30 22:34:42.811477', 'step': 14340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:42.870425', 'step': 14340, 'epoch': 2} {'type': 'loss', 'content': 0.08921992033720016, 'timestamp': '2025-09-30 22:34:42.875380', 'step': 14341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:42.931367', 'step': 14341, 'epoch': 2} {'type': 'loss', 'content': 0.23139147460460663, 'timestamp': '2025-09-30 22:34:42.940734', 'step': 14342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:42.999307', 'step': 14342, 'epoch': 2} {'type': 'loss', 'content': 0.11052872985601425, 'timestamp': '2025-09-30 22:34:43.001750', 'step': 14343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:43.061146', 'step': 14343, 'epoch': 2} {'type': 'loss', 'content': 0.1373252123594284, 'timestamp': '2025-09-30 22:34:43.070742', 'step': 14344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:43.127312', 'step': 14344, 'epoch': 2} {'type': 'loss', 'content': 0.10257970541715622, 'timestamp': '2025-09-30 22:34:43.129674', 'step': 14345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:43.189769', 'step': 14345, 'epoch': 2} {'type': 'loss', 'content': 0.08623018860816956, 'timestamp': '2025-09-30 22:34:43.192144', 'step': 14346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:43.249748', 'step': 14346, 'epoch': 2} {'type': 'loss', 'content': 0.08981567621231079, 'timestamp': '2025-09-30 22:34:43.251807', 'step': 14347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:34:43.309425', 'step': 14347, 'epoch': 2} {'type': 'loss', 'content': 0.10409727692604065, 'timestamp': '2025-09-30 22:34:43.315449', 'step': 14348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:43.374881', 'step': 14348, 'epoch': 2} {'type': 'loss', 'content': 0.0901629775762558, 'timestamp': '2025-09-30 22:34:43.377307', 'step': 14349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:43.434663', 'step': 14349, 'epoch': 2} {'type': 'loss', 'content': 0.07512932270765305, 'timestamp': '2025-09-30 22:34:43.436946', 'step': 14350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:43.511215', 'step': 14350, 'epoch': 2} {'type': 'loss', 'content': 0.14403943717479706, 'timestamp': '2025-09-30 22:34:43.514026', 'step': 14351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:43.581705', 'step': 14351, 'epoch': 2} {'type': 'loss', 'content': 0.1029842346906662, 'timestamp': '2025-09-30 22:34:43.587776', 'step': 14352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:43.644756', 'step': 14352, 'epoch': 2} {'type': 'loss', 'content': 0.1962640881538391, 'timestamp': '2025-09-30 22:34:43.649294', 'step': 14353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:43.716040', 'step': 14353, 'epoch': 2} {'type': 'loss', 'content': 0.09584252536296844, 'timestamp': '2025-09-30 22:34:43.724053', 'step': 14354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:43.781699', 'step': 14354, 'epoch': 2} {'type': 'loss', 'content': 0.11102191358804703, 'timestamp': '2025-09-30 22:34:43.784413', 'step': 14355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:43.842551', 'step': 14355, 'epoch': 2} {'type': 'loss', 'content': 0.12458769977092743, 'timestamp': '2025-09-30 22:34:43.849252', 'step': 14356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:43.909258', 'step': 14356, 'epoch': 2} {'type': 'loss', 'content': 0.10900264978408813, 'timestamp': '2025-09-30 22:34:43.911775', 'step': 14357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:43.968747', 'step': 14357, 'epoch': 2} {'type': 'loss', 'content': 0.09976279735565186, 'timestamp': '2025-09-30 22:34:43.971857', 'step': 14358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:44.031322', 'step': 14358, 'epoch': 2} {'type': 'loss', 'content': 0.16119928658008575, 'timestamp': '2025-09-30 22:34:44.034482', 'step': 14359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:44.093110', 'step': 14359, 'epoch': 2} {'type': 'loss', 'content': 0.09860183298587799, 'timestamp': '2025-09-30 22:34:44.100498', 'step': 14360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:44.158953', 'step': 14360, 'epoch': 2} {'type': 'loss', 'content': 0.11664231866598129, 'timestamp': '2025-09-30 22:34:44.163903', 'step': 14361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:44.225970', 'step': 14361, 'epoch': 2} {'type': 'loss', 'content': 0.21494589745998383, 'timestamp': '2025-09-30 22:34:44.230242', 'step': 14362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:44.291643', 'step': 14362, 'epoch': 2} {'type': 'loss', 'content': 0.16763898730278015, 'timestamp': '2025-09-30 22:34:44.294040', 'step': 14363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:44.351446', 'step': 14363, 'epoch': 2} {'type': 'loss', 'content': 0.07769721746444702, 'timestamp': '2025-09-30 22:34:44.359080', 'step': 14364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:44.416939', 'step': 14364, 'epoch': 2} {'type': 'loss', 'content': 0.1655505895614624, 'timestamp': '2025-09-30 22:34:44.419737', 'step': 14365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:44.478668', 'step': 14365, 'epoch': 2} {'type': 'loss', 'content': 0.16858690977096558, 'timestamp': '2025-09-30 22:34:44.483477', 'step': 14366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:34:44.541750', 'step': 14366, 'epoch': 2} {'type': 'loss', 'content': 0.12110383808612823, 'timestamp': '2025-09-30 22:34:44.545391', 'step': 14367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:44.603424', 'step': 14367, 'epoch': 2} {'type': 'loss', 'content': 0.05699564889073372, 'timestamp': '2025-09-30 22:34:44.609776', 'step': 14368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:44.669478', 'step': 14368, 'epoch': 2} {'type': 'loss', 'content': 0.10637089610099792, 'timestamp': '2025-09-30 22:34:44.671635', 'step': 14369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:44.730120', 'step': 14369, 'epoch': 2} {'type': 'loss', 'content': 0.18461720645427704, 'timestamp': '2025-09-30 22:34:44.732221', 'step': 14370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:44.790426', 'step': 14370, 'epoch': 2} {'type': 'loss', 'content': 0.07800474017858505, 'timestamp': '2025-09-30 22:34:44.794284', 'step': 14371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:44.852691', 'step': 14371, 'epoch': 2} {'type': 'loss', 'content': 0.10549743473529816, 'timestamp': '2025-09-30 22:34:44.858433', 'step': 14372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:44.918735', 'step': 14372, 'epoch': 2} {'type': 'loss', 'content': 0.08677985519170761, 'timestamp': '2025-09-30 22:34:44.923004', 'step': 14373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:44.981288', 'step': 14373, 'epoch': 2} {'type': 'loss', 'content': 0.09706255793571472, 'timestamp': '2025-09-30 22:34:44.984329', 'step': 14374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:45.043857', 'step': 14374, 'epoch': 2} {'type': 'loss', 'content': 0.19772663712501526, 'timestamp': '2025-09-30 22:34:45.047763', 'step': 14375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:45.110871', 'step': 14375, 'epoch': 2} {'type': 'loss', 'content': 0.07934003323316574, 'timestamp': '2025-09-30 22:34:45.116745', 'step': 14376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:45.174311', 'step': 14376, 'epoch': 2} {'type': 'loss', 'content': 0.07678859680891037, 'timestamp': '2025-09-30 22:34:45.177031', 'step': 14377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:45.250067', 'step': 14377, 'epoch': 2} {'type': 'loss', 'content': 0.05216342955827713, 'timestamp': '2025-09-30 22:34:45.254962', 'step': 14378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:45.316085', 'step': 14378, 'epoch': 2} {'type': 'loss', 'content': 0.12485641241073608, 'timestamp': '2025-09-30 22:34:45.320671', 'step': 14379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:45.382159', 'step': 14379, 'epoch': 2} {'type': 'loss', 'content': 0.06825833767652512, 'timestamp': '2025-09-30 22:34:45.391779', 'step': 14380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:45.453134', 'step': 14380, 'epoch': 2} {'type': 'loss', 'content': 0.07203438878059387, 'timestamp': '2025-09-30 22:34:45.457021', 'step': 14381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:45.516842', 'step': 14381, 'epoch': 2} {'type': 'loss', 'content': 0.1195872500538826, 'timestamp': '2025-09-30 22:34:45.519049', 'step': 14382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:45.576354', 'step': 14382, 'epoch': 2} {'type': 'loss', 'content': 0.11632946878671646, 'timestamp': '2025-09-30 22:34:45.580565', 'step': 14383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:45.638393', 'step': 14383, 'epoch': 2} {'type': 'loss', 'content': 0.09190121293067932, 'timestamp': '2025-09-30 22:34:45.646232', 'step': 14384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:45.715963', 'step': 14384, 'epoch': 2} {'type': 'loss', 'content': 0.08352357894182205, 'timestamp': '2025-09-30 22:34:45.718421', 'step': 14385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:34:45.776338', 'step': 14385, 'epoch': 2} {'type': 'loss', 'content': 0.08941339701414108, 'timestamp': '2025-09-30 22:34:45.779228', 'step': 14386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:45.836187', 'step': 14386, 'epoch': 2} {'type': 'loss', 'content': 0.1125297024846077, 'timestamp': '2025-09-30 22:34:45.839126', 'step': 14387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:45.897985', 'step': 14387, 'epoch': 2} {'type': 'loss', 'content': 0.1318451464176178, 'timestamp': '2025-09-30 22:34:45.904477', 'step': 14388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:45.964561', 'step': 14388, 'epoch': 2} {'type': 'loss', 'content': 0.14208990335464478, 'timestamp': '2025-09-30 22:34:45.967093', 'step': 14389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:46.023698', 'step': 14389, 'epoch': 2} {'type': 'loss', 'content': 0.1517498642206192, 'timestamp': '2025-09-30 22:34:46.028119', 'step': 14390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:46.085455', 'step': 14390, 'epoch': 2} {'type': 'loss', 'content': 0.0692889615893364, 'timestamp': '2025-09-30 22:34:46.088093', 'step': 14391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:46.161587', 'step': 14391, 'epoch': 2} {'type': 'loss', 'content': 0.05124124884605408, 'timestamp': '2025-09-30 22:34:46.167939', 'step': 14392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:46.224317', 'step': 14392, 'epoch': 2} {'type': 'loss', 'content': 0.03635203465819359, 'timestamp': '2025-09-30 22:34:46.228224', 'step': 14393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:46.302374', 'step': 14393, 'epoch': 2} {'type': 'loss', 'content': 0.11295460909605026, 'timestamp': '2025-09-30 22:34:46.304907', 'step': 14394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:46.365995', 'step': 14394, 'epoch': 2} {'type': 'loss', 'content': 0.07359980791807175, 'timestamp': '2025-09-30 22:34:46.369014', 'step': 14395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:46.426677', 'step': 14395, 'epoch': 2} {'type': 'loss', 'content': 0.10008642822504044, 'timestamp': '2025-09-30 22:34:46.432874', 'step': 14396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:46.488919', 'step': 14396, 'epoch': 2} {'type': 'loss', 'content': 0.07601714134216309, 'timestamp': '2025-09-30 22:34:46.495132', 'step': 14397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:46.552731', 'step': 14397, 'epoch': 2} {'type': 'loss', 'content': 0.06477564573287964, 'timestamp': '2025-09-30 22:34:46.557813', 'step': 14398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:34:46.631333', 'step': 14398, 'epoch': 2} {'type': 'loss', 'content': 0.1094779372215271, 'timestamp': '2025-09-30 22:34:46.634904', 'step': 14399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:46.692526', 'step': 14399, 'epoch': 2} {'type': 'loss', 'content': 0.14503872394561768, 'timestamp': '2025-09-30 22:34:46.699621', 'step': 14400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:46.756185', 'step': 14400, 'epoch': 2} {'type': 'loss', 'content': 0.14964139461517334, 'timestamp': '2025-09-30 22:34:46.759794', 'step': 14401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:46.817812', 'step': 14401, 'epoch': 2} {'type': 'loss', 'content': 0.07773407548666, 'timestamp': '2025-09-30 22:34:46.821557', 'step': 14402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:46.878810', 'step': 14402, 'epoch': 2} {'type': 'loss', 'content': 0.13314568996429443, 'timestamp': '2025-09-30 22:34:46.882455', 'step': 14403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:46.940309', 'step': 14403, 'epoch': 2} {'type': 'loss', 'content': 0.07798632979393005, 'timestamp': '2025-09-30 22:34:46.947809', 'step': 14404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:47.020825', 'step': 14404, 'epoch': 2} {'type': 'loss', 'content': 0.08585649728775024, 'timestamp': '2025-09-30 22:34:47.024024', 'step': 14405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:47.084589', 'step': 14405, 'epoch': 2} {'type': 'loss', 'content': 0.11123287677764893, 'timestamp': '2025-09-30 22:34:47.087884', 'step': 14406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:47.148174', 'step': 14406, 'epoch': 2} {'type': 'loss', 'content': 0.08596225827932358, 'timestamp': '2025-09-30 22:34:47.152162', 'step': 14407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:47.210172', 'step': 14407, 'epoch': 2} {'type': 'loss', 'content': 0.06296835839748383, 'timestamp': '2025-09-30 22:34:47.217379', 'step': 14408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:47.289808', 'step': 14408, 'epoch': 2} {'type': 'loss', 'content': 0.10930685698986053, 'timestamp': '2025-09-30 22:34:47.293178', 'step': 14409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:34:47.355326', 'step': 14409, 'epoch': 2} {'type': 'loss', 'content': 0.19637629389762878, 'timestamp': '2025-09-30 22:34:47.358831', 'step': 14410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:47.415803', 'step': 14410, 'epoch': 2} {'type': 'loss', 'content': 0.08012455701828003, 'timestamp': '2025-09-30 22:34:47.418679', 'step': 14411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:47.476346', 'step': 14411, 'epoch': 2} {'type': 'loss', 'content': 0.0679028257727623, 'timestamp': '2025-09-30 22:34:47.483033', 'step': 14412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:47.539086', 'step': 14412, 'epoch': 2} {'type': 'loss', 'content': 0.1571158617734909, 'timestamp': '2025-09-30 22:34:47.541457', 'step': 14413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:47.612409', 'step': 14413, 'epoch': 2} {'type': 'loss', 'content': 0.07177257537841797, 'timestamp': '2025-09-30 22:34:47.614731', 'step': 14414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:47.695751', 'step': 14414, 'epoch': 2} {'type': 'loss', 'content': 0.1705392599105835, 'timestamp': '2025-09-30 22:34:47.698021', 'step': 14415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:47.758627', 'step': 14415, 'epoch': 2} {'type': 'loss', 'content': 0.09519688785076141, 'timestamp': '2025-09-30 22:34:47.765293', 'step': 14416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:47.821858', 'step': 14416, 'epoch': 2} {'type': 'loss', 'content': 0.2778078317642212, 'timestamp': '2025-09-30 22:34:47.824590', 'step': 14417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:47.881764', 'step': 14417, 'epoch': 2} {'type': 'loss', 'content': 0.11742053925991058, 'timestamp': '2025-09-30 22:34:47.884141', 'step': 14418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:47.942289', 'step': 14418, 'epoch': 2} {'type': 'loss', 'content': 0.17295807600021362, 'timestamp': '2025-09-30 22:34:47.945465', 'step': 14419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:48.007622', 'step': 14419, 'epoch': 2} {'type': 'loss', 'content': 0.12288783490657806, 'timestamp': '2025-09-30 22:34:48.013549', 'step': 14420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:48.068973', 'step': 14420, 'epoch': 2} {'type': 'loss', 'content': 0.16016915440559387, 'timestamp': '2025-09-30 22:34:48.071540', 'step': 14421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:48.128306', 'step': 14421, 'epoch': 2} {'type': 'loss', 'content': 0.12954466044902802, 'timestamp': '2025-09-30 22:34:48.130636', 'step': 14422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:48.199542', 'step': 14422, 'epoch': 2} {'type': 'loss', 'content': 0.08572224527597427, 'timestamp': '2025-09-30 22:34:48.203274', 'step': 14423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:48.259639', 'step': 14423, 'epoch': 2} {'type': 'loss', 'content': 0.09871450811624527, 'timestamp': '2025-09-30 22:34:48.265985', 'step': 14424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:48.345950', 'step': 14424, 'epoch': 2} {'type': 'loss', 'content': 0.12426671385765076, 'timestamp': '2025-09-30 22:34:48.348220', 'step': 14425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:48.407579', 'step': 14425, 'epoch': 2} {'type': 'loss', 'content': 0.048294927924871445, 'timestamp': '2025-09-30 22:34:48.410537', 'step': 14426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:48.477004', 'step': 14426, 'epoch': 2} {'type': 'loss', 'content': 0.11773474514484406, 'timestamp': '2025-09-30 22:34:48.480221', 'step': 14427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:48.538115', 'step': 14427, 'epoch': 2} {'type': 'loss', 'content': 0.12515060603618622, 'timestamp': '2025-09-30 22:34:48.544640', 'step': 14428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:48.602019', 'step': 14428, 'epoch': 2} {'type': 'loss', 'content': 0.03393888846039772, 'timestamp': '2025-09-30 22:34:48.604087', 'step': 14429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:48.661295', 'step': 14429, 'epoch': 2} {'type': 'loss', 'content': 0.12233932316303253, 'timestamp': '2025-09-30 22:34:48.664825', 'step': 14430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:48.725428', 'step': 14430, 'epoch': 2} {'type': 'loss', 'content': 0.1494603306055069, 'timestamp': '2025-09-30 22:34:48.727969', 'step': 14431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:48.784712', 'step': 14431, 'epoch': 2} {'type': 'loss', 'content': 0.08345691859722137, 'timestamp': '2025-09-30 22:34:48.790616', 'step': 14432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:48.859400', 'step': 14432, 'epoch': 2} {'type': 'loss', 'content': 0.2473137527704239, 'timestamp': '2025-09-30 22:34:48.863253', 'step': 14433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:48.922380', 'step': 14433, 'epoch': 2} {'type': 'loss', 'content': 0.14847536385059357, 'timestamp': '2025-09-30 22:34:48.925235', 'step': 14434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:34:49.005285', 'step': 14434, 'epoch': 2} {'type': 'loss', 'content': 0.16720862686634064, 'timestamp': '2025-09-30 22:34:49.007632', 'step': 14435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:49.073576', 'step': 14435, 'epoch': 2} {'type': 'loss', 'content': 0.0847381055355072, 'timestamp': '2025-09-30 22:34:49.079541', 'step': 14436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:49.147712', 'step': 14436, 'epoch': 2} {'type': 'loss', 'content': 0.13904310762882233, 'timestamp': '2025-09-30 22:34:49.149955', 'step': 14437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:49.206675', 'step': 14437, 'epoch': 2} {'type': 'loss', 'content': 0.24793674051761627, 'timestamp': '2025-09-30 22:34:49.213562', 'step': 14438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:49.273974', 'step': 14438, 'epoch': 2} {'type': 'loss', 'content': 0.09754971414804459, 'timestamp': '2025-09-30 22:34:49.276744', 'step': 14439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:49.334192', 'step': 14439, 'epoch': 2} {'type': 'loss', 'content': 0.11868318170309067, 'timestamp': '2025-09-30 22:34:49.340318', 'step': 14440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:49.395941', 'step': 14440, 'epoch': 2} {'type': 'loss', 'content': 0.12046704441308975, 'timestamp': '2025-09-30 22:34:49.400470', 'step': 14441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:49.459766', 'step': 14441, 'epoch': 2} {'type': 'loss', 'content': 0.06719782203435898, 'timestamp': '2025-09-30 22:34:49.462055', 'step': 14442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:34:49.521254', 'step': 14442, 'epoch': 2} {'type': 'loss', 'content': 0.15640667080879211, 'timestamp': '2025-09-30 22:34:49.523633', 'step': 14443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:49.580592', 'step': 14443, 'epoch': 2} {'type': 'loss', 'content': 0.059161458164453506, 'timestamp': '2025-09-30 22:34:49.587991', 'step': 14444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:34:49.644651', 'step': 14444, 'epoch': 2} {'type': 'loss', 'content': 0.08066985756158829, 'timestamp': '2025-09-30 22:34:49.647402', 'step': 14445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:34:49.704413', 'step': 14445, 'epoch': 2} {'type': 'loss', 'content': 0.16512590646743774, 'timestamp': '2025-09-30 22:34:49.706660', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:35:04.614995', 'step': 14446, 'epoch': 2} {'type': 'pplx', 'content': 9629.805075667138, 'timestamp': '2025-09-30 22:35:04.619540', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:04.685120', 'step': 14446, 'epoch': 2} {'type': 'loss', 'content': 0.16041216254234314, 'timestamp': '2025-09-30 22:35:04.687614', 'step': 14447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:04.745335', 'step': 14447, 'epoch': 2} {'type': 'loss', 'content': 0.07993291318416595, 'timestamp': '2025-09-30 22:35:04.756099', 'step': 14448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:04.817558', 'step': 14448, 'epoch': 2} {'type': 'loss', 'content': 0.06534570455551147, 'timestamp': '2025-09-30 22:35:04.821356', 'step': 14449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:04.880603', 'step': 14449, 'epoch': 2} {'type': 'loss', 'content': 0.14109326899051666, 'timestamp': '2025-09-30 22:35:04.887906', 'step': 14450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:04.948396', 'step': 14450, 'epoch': 2} {'type': 'loss', 'content': 0.14558199048042297, 'timestamp': '2025-09-30 22:35:04.958564', 'step': 14451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:05.016157', 'step': 14451, 'epoch': 2} {'type': 'loss', 'content': 0.1310199499130249, 'timestamp': '2025-09-30 22:35:05.023866', 'step': 14452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:05.087004', 'step': 14452, 'epoch': 2} {'type': 'loss', 'content': 0.1295430064201355, 'timestamp': '2025-09-30 22:35:05.090052', 'step': 14453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:05.151983', 'step': 14453, 'epoch': 2} {'type': 'loss', 'content': 0.08335774391889572, 'timestamp': '2025-09-30 22:35:05.158132', 'step': 14454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:05.218171', 'step': 14454, 'epoch': 2} {'type': 'loss', 'content': 0.2015080600976944, 'timestamp': '2025-09-30 22:35:05.226048', 'step': 14455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:05.286015', 'step': 14455, 'epoch': 2} {'type': 'loss', 'content': 0.0871623158454895, 'timestamp': '2025-09-30 22:35:05.294007', 'step': 14456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:05.349807', 'step': 14456, 'epoch': 2} {'type': 'loss', 'content': 0.09610078483819962, 'timestamp': '2025-09-30 22:35:05.352327', 'step': 14457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:05.408795', 'step': 14457, 'epoch': 2} {'type': 'loss', 'content': 0.13407951593399048, 'timestamp': '2025-09-30 22:35:05.414112', 'step': 14458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:05.472021', 'step': 14458, 'epoch': 2} {'type': 'loss', 'content': 0.10883095860481262, 'timestamp': '2025-09-30 22:35:05.474603', 'step': 14459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:05.532077', 'step': 14459, 'epoch': 2} {'type': 'loss', 'content': 0.19027897715568542, 'timestamp': '2025-09-30 22:35:05.540000', 'step': 14460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:05.601658', 'step': 14460, 'epoch': 2} {'type': 'loss', 'content': 0.043129101395606995, 'timestamp': '2025-09-30 22:35:05.604581', 'step': 14461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:05.662741', 'step': 14461, 'epoch': 2} {'type': 'loss', 'content': 0.11593341082334518, 'timestamp': '2025-09-30 22:35:05.665634', 'step': 14462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:05.728727', 'step': 14462, 'epoch': 2} {'type': 'loss', 'content': 0.2099035233259201, 'timestamp': '2025-09-30 22:35:05.731643', 'step': 14463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:05.793482', 'step': 14463, 'epoch': 2} {'type': 'loss', 'content': 0.12784841656684875, 'timestamp': '2025-09-30 22:35:05.800476', 'step': 14464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:05.859971', 'step': 14464, 'epoch': 2} {'type': 'loss', 'content': 0.11314886063337326, 'timestamp': '2025-09-30 22:35:05.862568', 'step': 14465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:05.920717', 'step': 14465, 'epoch': 2} {'type': 'loss', 'content': 0.09450249373912811, 'timestamp': '2025-09-30 22:35:05.923371', 'step': 14466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:05.984219', 'step': 14466, 'epoch': 2} {'type': 'loss', 'content': 0.11660949140787125, 'timestamp': '2025-09-30 22:35:05.988215', 'step': 14467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:06.046604', 'step': 14467, 'epoch': 2} {'type': 'loss', 'content': 0.07702011615037918, 'timestamp': '2025-09-30 22:35:06.054203', 'step': 14468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:06.117321', 'step': 14468, 'epoch': 2} {'type': 'loss', 'content': 0.1625262349843979, 'timestamp': '2025-09-30 22:35:06.124930', 'step': 14469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:06.183068', 'step': 14469, 'epoch': 2} {'type': 'loss', 'content': 0.11127570271492004, 'timestamp': '2025-09-30 22:35:06.191450', 'step': 14470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:06.250159', 'step': 14470, 'epoch': 2} {'type': 'loss', 'content': 0.08960162848234177, 'timestamp': '2025-09-30 22:35:06.252701', 'step': 14471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:06.312190', 'step': 14471, 'epoch': 2} {'type': 'loss', 'content': 0.10963286459445953, 'timestamp': '2025-09-30 22:35:06.322141', 'step': 14472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:06.382132', 'step': 14472, 'epoch': 2} {'type': 'loss', 'content': 0.19065812230110168, 'timestamp': '2025-09-30 22:35:06.385147', 'step': 14473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:06.454110', 'step': 14473, 'epoch': 2} {'type': 'loss', 'content': 0.17345258593559265, 'timestamp': '2025-09-30 22:35:06.457662', 'step': 14474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:06.516774', 'step': 14474, 'epoch': 2} {'type': 'loss', 'content': 0.08949907124042511, 'timestamp': '2025-09-30 22:35:06.520251', 'step': 14475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:06.579193', 'step': 14475, 'epoch': 2} {'type': 'loss', 'content': 0.1899128258228302, 'timestamp': '2025-09-30 22:35:06.589814', 'step': 14476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:06.657271', 'step': 14476, 'epoch': 2} {'type': 'loss', 'content': 0.18133020401000977, 'timestamp': '2025-09-30 22:35:06.664052', 'step': 14477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:06.728940', 'step': 14477, 'epoch': 2} {'type': 'loss', 'content': 0.17098723351955414, 'timestamp': '2025-09-30 22:35:06.732030', 'step': 14478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:06.792396', 'step': 14478, 'epoch': 2} {'type': 'loss', 'content': 0.14481371641159058, 'timestamp': '2025-09-30 22:35:06.795457', 'step': 14479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:06.853298', 'step': 14479, 'epoch': 2} {'type': 'loss', 'content': 0.07202358543872833, 'timestamp': '2025-09-30 22:35:06.859749', 'step': 14480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:06.915733', 'step': 14480, 'epoch': 2} {'type': 'loss', 'content': 0.13539689779281616, 'timestamp': '2025-09-30 22:35:06.923027', 'step': 14481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:06.990252', 'step': 14481, 'epoch': 2} {'type': 'loss', 'content': 0.09527736902236938, 'timestamp': '2025-09-30 22:35:06.993354', 'step': 14482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:07.056290', 'step': 14482, 'epoch': 2} {'type': 'loss', 'content': 0.09264180064201355, 'timestamp': '2025-09-30 22:35:07.059355', 'step': 14483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:07.125668', 'step': 14483, 'epoch': 2} {'type': 'loss', 'content': 0.0662764236330986, 'timestamp': '2025-09-30 22:35:07.131541', 'step': 14484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:07.197218', 'step': 14484, 'epoch': 2} {'type': 'loss', 'content': 0.152176633477211, 'timestamp': '2025-09-30 22:35:07.202748', 'step': 14485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:07.260166', 'step': 14485, 'epoch': 2} {'type': 'loss', 'content': 0.16638045012950897, 'timestamp': '2025-09-30 22:35:07.262656', 'step': 14486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:07.322442', 'step': 14486, 'epoch': 2} {'type': 'loss', 'content': 0.11876989901065826, 'timestamp': '2025-09-30 22:35:07.327526', 'step': 14487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:07.385381', 'step': 14487, 'epoch': 2} {'type': 'loss', 'content': 0.08292090892791748, 'timestamp': '2025-09-30 22:35:07.391787', 'step': 14488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:35:07.448008', 'step': 14488, 'epoch': 2} {'type': 'loss', 'content': 0.09041277319192886, 'timestamp': '2025-09-30 22:35:07.458042', 'step': 14489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:07.527112', 'step': 14489, 'epoch': 2} {'type': 'loss', 'content': 0.12789712846279144, 'timestamp': '2025-09-30 22:35:07.534800', 'step': 14490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:07.598076', 'step': 14490, 'epoch': 2} {'type': 'loss', 'content': 0.04023950174450874, 'timestamp': '2025-09-30 22:35:07.600804', 'step': 14491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:07.664198', 'step': 14491, 'epoch': 2} {'type': 'loss', 'content': 0.07064679265022278, 'timestamp': '2025-09-30 22:35:07.670953', 'step': 14492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:07.727679', 'step': 14492, 'epoch': 2} {'type': 'loss', 'content': 0.10851715505123138, 'timestamp': '2025-09-30 22:35:07.730735', 'step': 14493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:07.787683', 'step': 14493, 'epoch': 2} {'type': 'loss', 'content': 0.13711321353912354, 'timestamp': '2025-09-30 22:35:07.791156', 'step': 14494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:07.848321', 'step': 14494, 'epoch': 2} {'type': 'loss', 'content': 0.08675242960453033, 'timestamp': '2025-09-30 22:35:07.850433', 'step': 14495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:07.910702', 'step': 14495, 'epoch': 2} {'type': 'loss', 'content': 0.10003343224525452, 'timestamp': '2025-09-30 22:35:07.921876', 'step': 14496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:07.980660', 'step': 14496, 'epoch': 2} {'type': 'loss', 'content': 0.10648421943187714, 'timestamp': '2025-09-30 22:35:07.984658', 'step': 14497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:08.042796', 'step': 14497, 'epoch': 2} {'type': 'loss', 'content': 0.11687768250703812, 'timestamp': '2025-09-30 22:35:08.047187', 'step': 14498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:08.113661', 'step': 14498, 'epoch': 2} {'type': 'loss', 'content': 0.11306892335414886, 'timestamp': '2025-09-30 22:35:08.117622', 'step': 14499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:08.175651', 'step': 14499, 'epoch': 2} {'type': 'loss', 'content': 0.13977394998073578, 'timestamp': '2025-09-30 22:35:08.183091', 'step': 14500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14500', 'timestamp': '2025-09-30 22:35:08.777543', 'step': 14500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:08.838265', 'step': 14500, 'epoch': 2} {'type': 'loss', 'content': 0.09067153930664062, 'timestamp': '2025-09-30 22:35:08.841942', 'step': 14501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:08.906801', 'step': 14501, 'epoch': 2} {'type': 'loss', 'content': 0.18718165159225464, 'timestamp': '2025-09-30 22:35:08.915144', 'step': 14502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:08.979768', 'step': 14502, 'epoch': 2} {'type': 'loss', 'content': 0.10265939682722092, 'timestamp': '2025-09-30 22:35:08.983369', 'step': 14503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:09.043108', 'step': 14503, 'epoch': 2} {'type': 'loss', 'content': 0.09660220891237259, 'timestamp': '2025-09-30 22:35:09.050145', 'step': 14504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:09.108326', 'step': 14504, 'epoch': 2} {'type': 'loss', 'content': 0.08586021512746811, 'timestamp': '2025-09-30 22:35:09.112171', 'step': 14505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.176537', 'step': 14505, 'epoch': 2} {'type': 'loss', 'content': 0.07153323292732239, 'timestamp': '2025-09-30 22:35:09.184151', 'step': 14506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.247795', 'step': 14506, 'epoch': 2} {'type': 'loss', 'content': 0.1616954207420349, 'timestamp': '2025-09-30 22:35:09.257143', 'step': 14507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.314211', 'step': 14507, 'epoch': 2} {'type': 'loss', 'content': 0.13366566598415375, 'timestamp': '2025-09-30 22:35:09.329476', 'step': 14508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.391400', 'step': 14508, 'epoch': 2} {'type': 'loss', 'content': 0.08741828799247742, 'timestamp': '2025-09-30 22:35:09.395005', 'step': 14509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.458879', 'step': 14509, 'epoch': 2} {'type': 'loss', 'content': 0.10058844089508057, 'timestamp': '2025-09-30 22:35:09.463580', 'step': 14510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.523268', 'step': 14510, 'epoch': 2} {'type': 'loss', 'content': 0.17762066423892975, 'timestamp': '2025-09-30 22:35:09.527185', 'step': 14511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:09.586799', 'step': 14511, 'epoch': 2} {'type': 'loss', 'content': 0.18032047152519226, 'timestamp': '2025-09-30 22:35:09.594353', 'step': 14512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.653130', 'step': 14512, 'epoch': 2} {'type': 'loss', 'content': 0.16760976612567902, 'timestamp': '2025-09-30 22:35:09.656074', 'step': 14513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:09.713533', 'step': 14513, 'epoch': 2} {'type': 'loss', 'content': 0.1403084397315979, 'timestamp': '2025-09-30 22:35:09.715946', 'step': 14514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:09.773988', 'step': 14514, 'epoch': 2} {'type': 'loss', 'content': 0.14809560775756836, 'timestamp': '2025-09-30 22:35:09.783954', 'step': 14515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:09.845149', 'step': 14515, 'epoch': 2} {'type': 'loss', 'content': 0.055973488837480545, 'timestamp': '2025-09-30 22:35:09.851788', 'step': 14516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:09.917822', 'step': 14516, 'epoch': 2} {'type': 'loss', 'content': 0.16122674942016602, 'timestamp': '2025-09-30 22:35:09.920649', 'step': 14517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:09.989277', 'step': 14517, 'epoch': 2} {'type': 'loss', 'content': 0.1807786524295807, 'timestamp': '2025-09-30 22:35:09.992190', 'step': 14518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:10.052840', 'step': 14518, 'epoch': 2} {'type': 'loss', 'content': 0.22395603358745575, 'timestamp': '2025-09-30 22:35:10.056763', 'step': 14519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:10.113695', 'step': 14519, 'epoch': 2} {'type': 'loss', 'content': 0.2673684358596802, 'timestamp': '2025-09-30 22:35:10.119881', 'step': 14520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:10.184641', 'step': 14520, 'epoch': 2} {'type': 'loss', 'content': 0.12046711146831512, 'timestamp': '2025-09-30 22:35:10.187797', 'step': 14521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:10.248324', 'step': 14521, 'epoch': 2} {'type': 'loss', 'content': 0.19422823190689087, 'timestamp': '2025-09-30 22:35:10.255270', 'step': 14522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:10.315233', 'step': 14522, 'epoch': 2} {'type': 'loss', 'content': 0.17380796372890472, 'timestamp': '2025-09-30 22:35:10.322154', 'step': 14523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:10.379856', 'step': 14523, 'epoch': 2} {'type': 'loss', 'content': 0.1725841611623764, 'timestamp': '2025-09-30 22:35:10.385898', 'step': 14524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:10.444693', 'step': 14524, 'epoch': 2} {'type': 'loss', 'content': 0.11071418225765228, 'timestamp': '2025-09-30 22:35:10.454926', 'step': 14525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:10.526195', 'step': 14525, 'epoch': 2} {'type': 'loss', 'content': 0.15361528098583221, 'timestamp': '2025-09-30 22:35:10.535968', 'step': 14526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:35:10.602496', 'step': 14526, 'epoch': 2} {'type': 'loss', 'content': 0.16634084284305573, 'timestamp': '2025-09-30 22:35:10.606169', 'step': 14527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:10.664071', 'step': 14527, 'epoch': 2} {'type': 'loss', 'content': 0.06732764840126038, 'timestamp': '2025-09-30 22:35:10.672833', 'step': 14528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:10.732890', 'step': 14528, 'epoch': 2} {'type': 'loss', 'content': 0.10612432658672333, 'timestamp': '2025-09-30 22:35:10.738990', 'step': 14529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:10.801024', 'step': 14529, 'epoch': 2} {'type': 'loss', 'content': 0.08665731549263, 'timestamp': '2025-09-30 22:35:10.803366', 'step': 14530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:10.864992', 'step': 14530, 'epoch': 2} {'type': 'loss', 'content': 0.12675750255584717, 'timestamp': '2025-09-30 22:35:10.869858', 'step': 14531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:10.933694', 'step': 14531, 'epoch': 2} {'type': 'loss', 'content': 0.1084047332406044, 'timestamp': '2025-09-30 22:35:10.943951', 'step': 14532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:11.004186', 'step': 14532, 'epoch': 2} {'type': 'loss', 'content': 0.08664006739854813, 'timestamp': '2025-09-30 22:35:11.007370', 'step': 14533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.069004', 'step': 14533, 'epoch': 2} {'type': 'loss', 'content': 0.1237676814198494, 'timestamp': '2025-09-30 22:35:11.073636', 'step': 14534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:11.134348', 'step': 14534, 'epoch': 2} {'type': 'loss', 'content': 0.07399648427963257, 'timestamp': '2025-09-30 22:35:11.144339', 'step': 14535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.210304', 'step': 14535, 'epoch': 2} {'type': 'loss', 'content': 0.086687371134758, 'timestamp': '2025-09-30 22:35:11.224645', 'step': 14536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:11.283199', 'step': 14536, 'epoch': 2} {'type': 'loss', 'content': 0.06330916285514832, 'timestamp': '2025-09-30 22:35:11.286785', 'step': 14537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:11.346104', 'step': 14537, 'epoch': 2} {'type': 'loss', 'content': 0.16647979617118835, 'timestamp': '2025-09-30 22:35:11.352661', 'step': 14538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.413212', 'step': 14538, 'epoch': 2} {'type': 'loss', 'content': 0.12055595964193344, 'timestamp': '2025-09-30 22:35:11.417600', 'step': 14539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:11.478571', 'step': 14539, 'epoch': 2} {'type': 'loss', 'content': 0.06707991659641266, 'timestamp': '2025-09-30 22:35:11.487456', 'step': 14540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.546109', 'step': 14540, 'epoch': 2} {'type': 'loss', 'content': 0.12712326645851135, 'timestamp': '2025-09-30 22:35:11.550120', 'step': 14541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.609475', 'step': 14541, 'epoch': 2} {'type': 'loss', 'content': 0.13169260323047638, 'timestamp': '2025-09-30 22:35:11.613371', 'step': 14542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:11.678170', 'step': 14542, 'epoch': 2} {'type': 'loss', 'content': 0.08827072381973267, 'timestamp': '2025-09-30 22:35:11.680757', 'step': 14543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.738412', 'step': 14543, 'epoch': 2} {'type': 'loss', 'content': 0.13228490948677063, 'timestamp': '2025-09-30 22:35:11.745024', 'step': 14544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:11.807822', 'step': 14544, 'epoch': 2} {'type': 'loss', 'content': 0.1622752845287323, 'timestamp': '2025-09-30 22:35:11.815740', 'step': 14545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.879612', 'step': 14545, 'epoch': 2} {'type': 'loss', 'content': 0.12549062073230743, 'timestamp': '2025-09-30 22:35:11.882445', 'step': 14546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:11.939855', 'step': 14546, 'epoch': 2} {'type': 'loss', 'content': 0.1542283594608307, 'timestamp': '2025-09-30 22:35:11.943124', 'step': 14547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:12.003752', 'step': 14547, 'epoch': 2} {'type': 'loss', 'content': 0.11259309202432632, 'timestamp': '2025-09-30 22:35:12.010084', 'step': 14548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:12.069948', 'step': 14548, 'epoch': 2} {'type': 'loss', 'content': 0.07638891041278839, 'timestamp': '2025-09-30 22:35:12.074587', 'step': 14549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:12.132108', 'step': 14549, 'epoch': 2} {'type': 'loss', 'content': 0.0204829890280962, 'timestamp': '2025-09-30 22:35:12.135126', 'step': 14550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:12.192974', 'step': 14550, 'epoch': 2} {'type': 'loss', 'content': 0.10045723617076874, 'timestamp': '2025-09-30 22:35:12.195479', 'step': 14551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:12.258735', 'step': 14551, 'epoch': 2} {'type': 'loss', 'content': 0.07302435487508774, 'timestamp': '2025-09-30 22:35:12.266300', 'step': 14552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:12.323306', 'step': 14552, 'epoch': 2} {'type': 'loss', 'content': 0.09160841256380081, 'timestamp': '2025-09-30 22:35:12.327162', 'step': 14553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:12.387360', 'step': 14553, 'epoch': 2} {'type': 'loss', 'content': 0.08055581897497177, 'timestamp': '2025-09-30 22:35:12.391220', 'step': 14554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:12.449405', 'step': 14554, 'epoch': 2} {'type': 'loss', 'content': 0.18327613174915314, 'timestamp': '2025-09-30 22:35:12.456415', 'step': 14555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:12.514477', 'step': 14555, 'epoch': 2} {'type': 'loss', 'content': 0.10989084839820862, 'timestamp': '2025-09-30 22:35:12.526678', 'step': 14556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:12.584011', 'step': 14556, 'epoch': 2} {'type': 'loss', 'content': 0.11095127463340759, 'timestamp': '2025-09-30 22:35:12.591467', 'step': 14557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:12.650779', 'step': 14557, 'epoch': 2} {'type': 'loss', 'content': 0.13933740556240082, 'timestamp': '2025-09-30 22:35:12.653224', 'step': 14558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:12.714168', 'step': 14558, 'epoch': 2} {'type': 'loss', 'content': 0.08621126413345337, 'timestamp': '2025-09-30 22:35:12.716805', 'step': 14559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:12.775084', 'step': 14559, 'epoch': 2} {'type': 'loss', 'content': 0.13766252994537354, 'timestamp': '2025-09-30 22:35:12.783101', 'step': 14560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:12.840376', 'step': 14560, 'epoch': 2} {'type': 'loss', 'content': 0.14636766910552979, 'timestamp': '2025-09-30 22:35:12.846753', 'step': 14561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:12.902616', 'step': 14561, 'epoch': 2} {'type': 'loss', 'content': 0.18885783851146698, 'timestamp': '2025-09-30 22:35:12.916878', 'step': 14562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:12.983485', 'step': 14562, 'epoch': 2} {'type': 'loss', 'content': 0.1100977286696434, 'timestamp': '2025-09-30 22:35:12.986582', 'step': 14563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:13.043683', 'step': 14563, 'epoch': 2} {'type': 'loss', 'content': 0.18185289204120636, 'timestamp': '2025-09-30 22:35:13.050321', 'step': 14564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:13.108199', 'step': 14564, 'epoch': 2} {'type': 'loss', 'content': 0.0796646997332573, 'timestamp': '2025-09-30 22:35:13.110753', 'step': 14565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:13.167230', 'step': 14565, 'epoch': 2} {'type': 'loss', 'content': 0.1252862960100174, 'timestamp': '2025-09-30 22:35:13.170856', 'step': 14566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:13.229019', 'step': 14566, 'epoch': 2} {'type': 'loss', 'content': 0.06480501592159271, 'timestamp': '2025-09-30 22:35:13.231739', 'step': 14567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:35:13.290239', 'step': 14567, 'epoch': 2} {'type': 'loss', 'content': 0.10443540662527084, 'timestamp': '2025-09-30 22:35:13.298095', 'step': 14568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:13.364940', 'step': 14568, 'epoch': 2} {'type': 'loss', 'content': 0.07335954159498215, 'timestamp': '2025-09-30 22:35:13.372250', 'step': 14569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:13.428938', 'step': 14569, 'epoch': 2} {'type': 'loss', 'content': 0.10563413798809052, 'timestamp': '2025-09-30 22:35:13.431912', 'step': 14570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:13.494843', 'step': 14570, 'epoch': 2} {'type': 'loss', 'content': 0.11658988893032074, 'timestamp': '2025-09-30 22:35:13.498008', 'step': 14571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:13.560541', 'step': 14571, 'epoch': 2} {'type': 'loss', 'content': 0.17324596643447876, 'timestamp': '2025-09-30 22:35:13.566973', 'step': 14572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:13.623398', 'step': 14572, 'epoch': 2} {'type': 'loss', 'content': 0.11557355523109436, 'timestamp': '2025-09-30 22:35:13.626682', 'step': 14573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:13.684610', 'step': 14573, 'epoch': 2} {'type': 'loss', 'content': 0.07504121214151382, 'timestamp': '2025-09-30 22:35:13.687728', 'step': 14574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:13.745101', 'step': 14574, 'epoch': 2} {'type': 'loss', 'content': 0.12676183879375458, 'timestamp': '2025-09-30 22:35:13.754880', 'step': 14575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:13.818143', 'step': 14575, 'epoch': 2} {'type': 'loss', 'content': 0.06622643768787384, 'timestamp': '2025-09-30 22:35:13.825337', 'step': 14576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:13.885125', 'step': 14576, 'epoch': 2} {'type': 'loss', 'content': 0.14442695677280426, 'timestamp': '2025-09-30 22:35:13.893211', 'step': 14577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:13.949944', 'step': 14577, 'epoch': 2} {'type': 'loss', 'content': 0.2222747802734375, 'timestamp': '2025-09-30 22:35:13.953035', 'step': 14578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:14.010768', 'step': 14578, 'epoch': 2} {'type': 'loss', 'content': 0.05152366682887077, 'timestamp': '2025-09-30 22:35:14.025037', 'step': 14579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:14.082727', 'step': 14579, 'epoch': 2} {'type': 'loss', 'content': 0.10605693608522415, 'timestamp': '2025-09-30 22:35:14.089146', 'step': 14580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:14.145586', 'step': 14580, 'epoch': 2} {'type': 'loss', 'content': 0.1222652792930603, 'timestamp': '2025-09-30 22:35:14.148150', 'step': 14581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:14.205460', 'step': 14581, 'epoch': 2} {'type': 'loss', 'content': 0.06886130571365356, 'timestamp': '2025-09-30 22:35:14.209538', 'step': 14582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:14.267657', 'step': 14582, 'epoch': 2} {'type': 'loss', 'content': 0.07850738614797592, 'timestamp': '2025-09-30 22:35:14.272443', 'step': 14583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:14.330232', 'step': 14583, 'epoch': 2} {'type': 'loss', 'content': 0.10183844715356827, 'timestamp': '2025-09-30 22:35:14.337383', 'step': 14584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:14.399068', 'step': 14584, 'epoch': 2} {'type': 'loss', 'content': 0.07763148844242096, 'timestamp': '2025-09-30 22:35:14.403614', 'step': 14585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:14.461947', 'step': 14585, 'epoch': 2} {'type': 'loss', 'content': 0.07323667407035828, 'timestamp': '2025-09-30 22:35:14.471824', 'step': 14586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:14.536667', 'step': 14586, 'epoch': 2} {'type': 'loss', 'content': 0.0764404758810997, 'timestamp': '2025-09-30 22:35:14.541454', 'step': 14587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:14.597067', 'step': 14587, 'epoch': 2} {'type': 'loss', 'content': 0.1891757994890213, 'timestamp': '2025-09-30 22:35:14.603627', 'step': 14588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:14.660702', 'step': 14588, 'epoch': 2} {'type': 'loss', 'content': 0.11233817785978317, 'timestamp': '2025-09-30 22:35:14.663875', 'step': 14589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:14.722118', 'step': 14589, 'epoch': 2} {'type': 'loss', 'content': 0.17437979578971863, 'timestamp': '2025-09-30 22:35:14.734151', 'step': 14590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:14.793138', 'step': 14590, 'epoch': 2} {'type': 'loss', 'content': 0.06759810447692871, 'timestamp': '2025-09-30 22:35:14.796740', 'step': 14591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:14.853480', 'step': 14591, 'epoch': 2} {'type': 'loss', 'content': 0.10257095098495483, 'timestamp': '2025-09-30 22:35:14.861565', 'step': 14592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:14.921672', 'step': 14592, 'epoch': 2} {'type': 'loss', 'content': 0.1300424039363861, 'timestamp': '2025-09-30 22:35:14.925876', 'step': 14593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:14.982505', 'step': 14593, 'epoch': 2} {'type': 'loss', 'content': 0.05880584195256233, 'timestamp': '2025-09-30 22:35:14.986440', 'step': 14594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:15.043685', 'step': 14594, 'epoch': 2} {'type': 'loss', 'content': 0.11640918999910355, 'timestamp': '2025-09-30 22:35:15.046912', 'step': 14595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:15.111844', 'step': 14595, 'epoch': 2} {'type': 'loss', 'content': 0.12076827138662338, 'timestamp': '2025-09-30 22:35:15.119307', 'step': 14596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:15.188954', 'step': 14596, 'epoch': 2} {'type': 'loss', 'content': 0.06310828775167465, 'timestamp': '2025-09-30 22:35:15.192470', 'step': 14597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:15.255039', 'step': 14597, 'epoch': 2} {'type': 'loss', 'content': 0.06793056428432465, 'timestamp': '2025-09-30 22:35:15.258226', 'step': 14598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:15.315720', 'step': 14598, 'epoch': 2} {'type': 'loss', 'content': 0.052609629929065704, 'timestamp': '2025-09-30 22:35:15.324033', 'step': 14599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:15.381778', 'step': 14599, 'epoch': 2} {'type': 'loss', 'content': 0.07822495698928833, 'timestamp': '2025-09-30 22:35:15.388796', 'step': 14600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:15.446594', 'step': 14600, 'epoch': 2} {'type': 'loss', 'content': 0.07820987701416016, 'timestamp': '2025-09-30 22:35:15.449759', 'step': 14601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:15.508810', 'step': 14601, 'epoch': 2} {'type': 'loss', 'content': 0.07922173291444778, 'timestamp': '2025-09-30 22:35:15.512147', 'step': 14602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:15.570204', 'step': 14602, 'epoch': 2} {'type': 'loss', 'content': 0.1465189903974533, 'timestamp': '2025-09-30 22:35:15.573335', 'step': 14603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:15.638965', 'step': 14603, 'epoch': 2} {'type': 'loss', 'content': 0.1454678326845169, 'timestamp': '2025-09-30 22:35:15.646814', 'step': 14604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:15.710429', 'step': 14604, 'epoch': 2} {'type': 'loss', 'content': 0.04368086904287338, 'timestamp': '2025-09-30 22:35:15.713407', 'step': 14605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:15.771740', 'step': 14605, 'epoch': 2} {'type': 'loss', 'content': 0.13307498395442963, 'timestamp': '2025-09-30 22:35:15.782689', 'step': 14606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:15.842318', 'step': 14606, 'epoch': 2} {'type': 'loss', 'content': 0.05092443898320198, 'timestamp': '2025-09-30 22:35:15.845829', 'step': 14607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:15.909032', 'step': 14607, 'epoch': 2} {'type': 'loss', 'content': 0.10105336457490921, 'timestamp': '2025-09-30 22:35:15.916078', 'step': 14608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:15.971874', 'step': 14608, 'epoch': 2} {'type': 'loss', 'content': 0.14883476495742798, 'timestamp': '2025-09-30 22:35:15.976075', 'step': 14609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:16.034531', 'step': 14609, 'epoch': 2} {'type': 'loss', 'content': 0.09058956056833267, 'timestamp': '2025-09-30 22:35:16.038448', 'step': 14610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:16.097664', 'step': 14610, 'epoch': 2} {'type': 'loss', 'content': 0.08436092734336853, 'timestamp': '2025-09-30 22:35:16.107795', 'step': 14611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:16.172903', 'step': 14611, 'epoch': 2} {'type': 'loss', 'content': 0.15675801038742065, 'timestamp': '2025-09-30 22:35:16.181835', 'step': 14612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:16.245948', 'step': 14612, 'epoch': 2} {'type': 'loss', 'content': 0.07639846950769424, 'timestamp': '2025-09-30 22:35:16.250966', 'step': 14613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:16.307929', 'step': 14613, 'epoch': 2} {'type': 'loss', 'content': 0.10995203256607056, 'timestamp': '2025-09-30 22:35:16.319569', 'step': 14614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:16.377243', 'step': 14614, 'epoch': 2} {'type': 'loss', 'content': 0.0768711194396019, 'timestamp': '2025-09-30 22:35:16.387473', 'step': 14615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:16.450047', 'step': 14615, 'epoch': 2} {'type': 'loss', 'content': 0.08115249127149582, 'timestamp': '2025-09-30 22:35:16.458700', 'step': 14616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:16.515082', 'step': 14616, 'epoch': 2} {'type': 'loss', 'content': 0.08936412632465363, 'timestamp': '2025-09-30 22:35:16.519199', 'step': 14617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:35:16.576057', 'step': 14617, 'epoch': 2} {'type': 'loss', 'content': 0.12277664989233017, 'timestamp': '2025-09-30 22:35:16.579330', 'step': 14618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:16.640006', 'step': 14618, 'epoch': 2} {'type': 'loss', 'content': 0.17863008379936218, 'timestamp': '2025-09-30 22:35:16.643634', 'step': 14619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:16.701840', 'step': 14619, 'epoch': 2} {'type': 'loss', 'content': 0.06998368352651596, 'timestamp': '2025-09-30 22:35:16.708704', 'step': 14620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:16.766062', 'step': 14620, 'epoch': 2} {'type': 'loss', 'content': 0.21377132833003998, 'timestamp': '2025-09-30 22:35:16.772225', 'step': 14621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:16.829292', 'step': 14621, 'epoch': 2} {'type': 'loss', 'content': 0.12385590374469757, 'timestamp': '2025-09-30 22:35:16.832903', 'step': 14622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:16.889680', 'step': 14622, 'epoch': 2} {'type': 'loss', 'content': 0.13082894682884216, 'timestamp': '2025-09-30 22:35:16.896505', 'step': 14623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:16.959103', 'step': 14623, 'epoch': 2} {'type': 'loss', 'content': 0.11389393359422684, 'timestamp': '2025-09-30 22:35:16.967072', 'step': 14624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:17.031769', 'step': 14624, 'epoch': 2} {'type': 'loss', 'content': 0.10250761359930038, 'timestamp': '2025-09-30 22:35:17.036426', 'step': 14625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:17.102109', 'step': 14625, 'epoch': 2} {'type': 'loss', 'content': 0.08943954855203629, 'timestamp': '2025-09-30 22:35:17.107128', 'step': 14626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:17.173243', 'step': 14626, 'epoch': 2} {'type': 'loss', 'content': 0.2382974773645401, 'timestamp': '2025-09-30 22:35:17.177634', 'step': 14627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:17.248316', 'step': 14627, 'epoch': 2} {'type': 'loss', 'content': 0.061959438025951385, 'timestamp': '2025-09-30 22:35:17.257640', 'step': 14628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:17.327780', 'step': 14628, 'epoch': 2} {'type': 'loss', 'content': 0.15363548696041107, 'timestamp': '2025-09-30 22:35:17.334418', 'step': 14629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:17.406137', 'step': 14629, 'epoch': 2} {'type': 'loss', 'content': 0.10909528285264969, 'timestamp': '2025-09-30 22:35:17.410375', 'step': 14630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:17.469620', 'step': 14630, 'epoch': 2} {'type': 'loss', 'content': 0.1207083985209465, 'timestamp': '2025-09-30 22:35:17.474980', 'step': 14631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:17.532809', 'step': 14631, 'epoch': 2} {'type': 'loss', 'content': 0.1446657031774521, 'timestamp': '2025-09-30 22:35:17.541442', 'step': 14632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:17.600477', 'step': 14632, 'epoch': 2} {'type': 'loss', 'content': 0.11711005121469498, 'timestamp': '2025-09-30 22:35:17.607663', 'step': 14633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:17.667890', 'step': 14633, 'epoch': 2} {'type': 'loss', 'content': 0.11289586871862411, 'timestamp': '2025-09-30 22:35:17.683547', 'step': 14634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:17.742024', 'step': 14634, 'epoch': 2} {'type': 'loss', 'content': 0.09176541864871979, 'timestamp': '2025-09-30 22:35:17.746280', 'step': 14635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:17.805306', 'step': 14635, 'epoch': 2} {'type': 'loss', 'content': 0.10780353099107742, 'timestamp': '2025-09-30 22:35:17.813332', 'step': 14636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:17.873556', 'step': 14636, 'epoch': 2} {'type': 'loss', 'content': 0.15766429901123047, 'timestamp': '2025-09-30 22:35:17.877660', 'step': 14637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:17.935721', 'step': 14637, 'epoch': 2} {'type': 'loss', 'content': 0.07330745458602905, 'timestamp': '2025-09-30 22:35:17.941478', 'step': 14638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:18.001665', 'step': 14638, 'epoch': 2} {'type': 'loss', 'content': 0.0668833777308464, 'timestamp': '2025-09-30 22:35:18.007465', 'step': 14639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.070081', 'step': 14639, 'epoch': 2} {'type': 'loss', 'content': 0.1158808246254921, 'timestamp': '2025-09-30 22:35:18.079469', 'step': 14640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:18.148316', 'step': 14640, 'epoch': 2} {'type': 'loss', 'content': 0.19030648469924927, 'timestamp': '2025-09-30 22:35:18.151502', 'step': 14641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:18.210062', 'step': 14641, 'epoch': 2} {'type': 'loss', 'content': 0.11088649183511734, 'timestamp': '2025-09-30 22:35:18.213939', 'step': 14642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:18.274265', 'step': 14642, 'epoch': 2} {'type': 'loss', 'content': 0.0811895877122879, 'timestamp': '2025-09-30 22:35:18.278522', 'step': 14643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:18.336109', 'step': 14643, 'epoch': 2} {'type': 'loss', 'content': 0.1732117384672165, 'timestamp': '2025-09-30 22:35:18.342321', 'step': 14644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.418646', 'step': 14644, 'epoch': 2} {'type': 'loss', 'content': 0.15735942125320435, 'timestamp': '2025-09-30 22:35:18.423052', 'step': 14645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.482561', 'step': 14645, 'epoch': 2} {'type': 'loss', 'content': 0.08896289020776749, 'timestamp': '2025-09-30 22:35:18.486976', 'step': 14646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.544099', 'step': 14646, 'epoch': 2} {'type': 'loss', 'content': 0.15024490654468536, 'timestamp': '2025-09-30 22:35:18.548529', 'step': 14647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.607781', 'step': 14647, 'epoch': 2} {'type': 'loss', 'content': 0.10365118831396103, 'timestamp': '2025-09-30 22:35:18.614347', 'step': 14648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:18.671557', 'step': 14648, 'epoch': 2} {'type': 'loss', 'content': 0.16444185376167297, 'timestamp': '2025-09-30 22:35:18.674967', 'step': 14649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:18.732717', 'step': 14649, 'epoch': 2} {'type': 'loss', 'content': 0.08247974514961243, 'timestamp': '2025-09-30 22:35:18.743223', 'step': 14650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.800534', 'step': 14650, 'epoch': 2} {'type': 'loss', 'content': 0.15257959067821503, 'timestamp': '2025-09-30 22:35:18.811843', 'step': 14651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.870282', 'step': 14651, 'epoch': 2} {'type': 'loss', 'content': 0.1678551286458969, 'timestamp': '2025-09-30 22:35:18.878104', 'step': 14652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:18.936394', 'step': 14652, 'epoch': 2} {'type': 'loss', 'content': 0.1668701469898224, 'timestamp': '2025-09-30 22:35:18.940461', 'step': 14653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:18.997953', 'step': 14653, 'epoch': 2} {'type': 'loss', 'content': 0.1379031091928482, 'timestamp': '2025-09-30 22:35:19.004537', 'step': 14654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:19.060976', 'step': 14654, 'epoch': 2} {'type': 'loss', 'content': 0.14108487963676453, 'timestamp': '2025-09-30 22:35:19.064529', 'step': 14655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:19.120506', 'step': 14655, 'epoch': 2} {'type': 'loss', 'content': 0.1161448061466217, 'timestamp': '2025-09-30 22:35:19.128312', 'step': 14656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:19.186704', 'step': 14656, 'epoch': 2} {'type': 'loss', 'content': 0.15092776715755463, 'timestamp': '2025-09-30 22:35:19.190526', 'step': 14657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:19.247218', 'step': 14657, 'epoch': 2} {'type': 'loss', 'content': 0.05370251461863518, 'timestamp': '2025-09-30 22:35:19.251718', 'step': 14658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:19.310694', 'step': 14658, 'epoch': 2} {'type': 'loss', 'content': 0.08702380210161209, 'timestamp': '2025-09-30 22:35:19.316111', 'step': 14659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:19.373341', 'step': 14659, 'epoch': 2} {'type': 'loss', 'content': 0.08041064441204071, 'timestamp': '2025-09-30 22:35:19.381051', 'step': 14660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:19.446350', 'step': 14660, 'epoch': 2} {'type': 'loss', 'content': 0.06648613512516022, 'timestamp': '2025-09-30 22:35:19.455762', 'step': 14661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:19.517600', 'step': 14661, 'epoch': 2} {'type': 'loss', 'content': 0.11106357723474503, 'timestamp': '2025-09-30 22:35:19.530123', 'step': 14662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:19.589811', 'step': 14662, 'epoch': 2} {'type': 'loss', 'content': 0.2081901878118515, 'timestamp': '2025-09-30 22:35:19.594093', 'step': 14663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:19.650943', 'step': 14663, 'epoch': 2} {'type': 'loss', 'content': 0.14577099680900574, 'timestamp': '2025-09-30 22:35:19.662436', 'step': 14664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:19.722424', 'step': 14664, 'epoch': 2} {'type': 'loss', 'content': 0.12296322733163834, 'timestamp': '2025-09-30 22:35:19.727259', 'step': 14665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:19.788596', 'step': 14665, 'epoch': 2} {'type': 'loss', 'content': 0.09651388227939606, 'timestamp': '2025-09-30 22:35:19.791563', 'step': 14666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:19.849756', 'step': 14666, 'epoch': 2} {'type': 'loss', 'content': 0.18082089722156525, 'timestamp': '2025-09-30 22:35:19.854462', 'step': 14667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:19.912499', 'step': 14667, 'epoch': 2} {'type': 'loss', 'content': 0.11909253150224686, 'timestamp': '2025-09-30 22:35:19.919410', 'step': 14668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:19.982683', 'step': 14668, 'epoch': 2} {'type': 'loss', 'content': 0.09548670053482056, 'timestamp': '2025-09-30 22:35:19.987038', 'step': 14669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:35:20.045379', 'step': 14669, 'epoch': 2} {'type': 'loss', 'content': 0.11518580466508865, 'timestamp': '2025-09-30 22:35:20.049621', 'step': 14670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:20.116105', 'step': 14670, 'epoch': 2} {'type': 'loss', 'content': 0.08715473860502243, 'timestamp': '2025-09-30 22:35:20.120004', 'step': 14671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:20.176352', 'step': 14671, 'epoch': 2} {'type': 'loss', 'content': 0.08043263107538223, 'timestamp': '2025-09-30 22:35:20.183090', 'step': 14672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:20.239104', 'step': 14672, 'epoch': 2} {'type': 'loss', 'content': 0.09653642028570175, 'timestamp': '2025-09-30 22:35:20.249503', 'step': 14673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:20.316223', 'step': 14673, 'epoch': 2} {'type': 'loss', 'content': 0.1416715532541275, 'timestamp': '2025-09-30 22:35:20.319083', 'step': 14674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:20.376952', 'step': 14674, 'epoch': 2} {'type': 'loss', 'content': 0.09772321581840515, 'timestamp': '2025-09-30 22:35:20.379828', 'step': 14675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:20.438015', 'step': 14675, 'epoch': 2} {'type': 'loss', 'content': 0.09076182544231415, 'timestamp': '2025-09-30 22:35:20.446099', 'step': 14676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:20.510547', 'step': 14676, 'epoch': 2} {'type': 'loss', 'content': 0.1285010427236557, 'timestamp': '2025-09-30 22:35:20.513897', 'step': 14677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:20.579575', 'step': 14677, 'epoch': 2} {'type': 'loss', 'content': 0.058758415281772614, 'timestamp': '2025-09-30 22:35:20.583842', 'step': 14678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:20.641733', 'step': 14678, 'epoch': 2} {'type': 'loss', 'content': 0.06210717558860779, 'timestamp': '2025-09-30 22:35:20.646196', 'step': 14679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:20.703622', 'step': 14679, 'epoch': 2} {'type': 'loss', 'content': 0.11221113801002502, 'timestamp': '2025-09-30 22:35:20.710987', 'step': 14680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:20.767762', 'step': 14680, 'epoch': 2} {'type': 'loss', 'content': 0.11433328688144684, 'timestamp': '2025-09-30 22:35:20.771507', 'step': 14681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:20.845663', 'step': 14681, 'epoch': 2} {'type': 'loss', 'content': 0.09840422868728638, 'timestamp': '2025-09-30 22:35:20.853469', 'step': 14682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:20.912260', 'step': 14682, 'epoch': 2} {'type': 'loss', 'content': 0.1509883850812912, 'timestamp': '2025-09-30 22:35:20.916929', 'step': 14683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:20.973912', 'step': 14683, 'epoch': 2} {'type': 'loss', 'content': 0.125505268573761, 'timestamp': '2025-09-30 22:35:20.981227', 'step': 14684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:21.039105', 'step': 14684, 'epoch': 2} {'type': 'loss', 'content': 0.16152282059192657, 'timestamp': '2025-09-30 22:35:21.051524', 'step': 14685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:21.109412', 'step': 14685, 'epoch': 2} {'type': 'loss', 'content': 0.18517878651618958, 'timestamp': '2025-09-30 22:35:21.112996', 'step': 14686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:21.174888', 'step': 14686, 'epoch': 2} {'type': 'loss', 'content': 0.08176728338003159, 'timestamp': '2025-09-30 22:35:21.179187', 'step': 14687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.237468', 'step': 14687, 'epoch': 2} {'type': 'loss', 'content': 0.06725623458623886, 'timestamp': '2025-09-30 22:35:21.254600', 'step': 14688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:35:21.312175', 'step': 14688, 'epoch': 2} {'type': 'loss', 'content': 0.07622729241847992, 'timestamp': '2025-09-30 22:35:21.316305', 'step': 14689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:21.373653', 'step': 14689, 'epoch': 2} {'type': 'loss', 'content': 0.08852238953113556, 'timestamp': '2025-09-30 22:35:21.376408', 'step': 14690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.436388', 'step': 14690, 'epoch': 2} {'type': 'loss', 'content': 0.13513301312923431, 'timestamp': '2025-09-30 22:35:21.440815', 'step': 14691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.497298', 'step': 14691, 'epoch': 2} {'type': 'loss', 'content': 0.05528482422232628, 'timestamp': '2025-09-30 22:35:21.504905', 'step': 14692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.562640', 'step': 14692, 'epoch': 2} {'type': 'loss', 'content': 0.09498155117034912, 'timestamp': '2025-09-30 22:35:21.566785', 'step': 14693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.623828', 'step': 14693, 'epoch': 2} {'type': 'loss', 'content': 0.06874691694974899, 'timestamp': '2025-09-30 22:35:21.627909', 'step': 14694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.685877', 'step': 14694, 'epoch': 2} {'type': 'loss', 'content': 0.03533689305186272, 'timestamp': '2025-09-30 22:35:21.689226', 'step': 14695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:21.746373', 'step': 14695, 'epoch': 2} {'type': 'loss', 'content': 0.07764088362455368, 'timestamp': '2025-09-30 22:35:21.752922', 'step': 14696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.817040', 'step': 14696, 'epoch': 2} {'type': 'loss', 'content': 0.0950067788362503, 'timestamp': '2025-09-30 22:35:21.820634', 'step': 14697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:21.878138', 'step': 14697, 'epoch': 2} {'type': 'loss', 'content': 0.048442382365465164, 'timestamp': '2025-09-30 22:35:21.891385', 'step': 14698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:21.950519', 'step': 14698, 'epoch': 2} {'type': 'loss', 'content': 0.11226406693458557, 'timestamp': '2025-09-30 22:35:21.953630', 'step': 14699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:22.010290', 'step': 14699, 'epoch': 2} {'type': 'loss', 'content': 0.14720451831817627, 'timestamp': '2025-09-30 22:35:22.021926', 'step': 14700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:22.078769', 'step': 14700, 'epoch': 2} {'type': 'loss', 'content': 0.0989728718996048, 'timestamp': '2025-09-30 22:35:22.081445', 'step': 14701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:22.137758', 'step': 14701, 'epoch': 2} {'type': 'loss', 'content': 0.04903160408139229, 'timestamp': '2025-09-30 22:35:22.141046', 'step': 14702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:22.199241', 'step': 14702, 'epoch': 2} {'type': 'loss', 'content': 0.18118742108345032, 'timestamp': '2025-09-30 22:35:22.202225', 'step': 14703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:22.259055', 'step': 14703, 'epoch': 2} {'type': 'loss', 'content': 0.0794493556022644, 'timestamp': '2025-09-30 22:35:22.265085', 'step': 14704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:22.320890', 'step': 14704, 'epoch': 2} {'type': 'loss', 'content': 0.13756106793880463, 'timestamp': '2025-09-30 22:35:22.325016', 'step': 14705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:22.382470', 'step': 14705, 'epoch': 2} {'type': 'loss', 'content': 0.12528589367866516, 'timestamp': '2025-09-30 22:35:22.385864', 'step': 14706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:22.442675', 'step': 14706, 'epoch': 2} {'type': 'loss', 'content': 0.05405334755778313, 'timestamp': '2025-09-30 22:35:22.445942', 'step': 14707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:22.503652', 'step': 14707, 'epoch': 2} {'type': 'loss', 'content': 0.1562374383211136, 'timestamp': '2025-09-30 22:35:22.513080', 'step': 14708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:22.584201', 'step': 14708, 'epoch': 2} {'type': 'loss', 'content': 0.07919842004776001, 'timestamp': '2025-09-30 22:35:22.589185', 'step': 14709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:22.646602', 'step': 14709, 'epoch': 2} {'type': 'loss', 'content': 0.07504752278327942, 'timestamp': '2025-09-30 22:35:22.650212', 'step': 14710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:22.710184', 'step': 14710, 'epoch': 2} {'type': 'loss', 'content': 0.08417010307312012, 'timestamp': '2025-09-30 22:35:22.713278', 'step': 14711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:22.773160', 'step': 14711, 'epoch': 2} {'type': 'loss', 'content': 0.08649016916751862, 'timestamp': '2025-09-30 22:35:22.779735', 'step': 14712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:22.837878', 'step': 14712, 'epoch': 2} {'type': 'loss', 'content': 0.1305411159992218, 'timestamp': '2025-09-30 22:35:22.841444', 'step': 14713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:22.898657', 'step': 14713, 'epoch': 2} {'type': 'loss', 'content': 0.11092625558376312, 'timestamp': '2025-09-30 22:35:22.908063', 'step': 14714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:22.964770', 'step': 14714, 'epoch': 2} {'type': 'loss', 'content': 0.08702606707811356, 'timestamp': '2025-09-30 22:35:22.967765', 'step': 14715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:23.027708', 'step': 14715, 'epoch': 2} {'type': 'loss', 'content': 0.13375060260295868, 'timestamp': '2025-09-30 22:35:23.034008', 'step': 14716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:23.090647', 'step': 14716, 'epoch': 2} {'type': 'loss', 'content': 0.07040510326623917, 'timestamp': '2025-09-30 22:35:23.094587', 'step': 14717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:23.153952', 'step': 14717, 'epoch': 2} {'type': 'loss', 'content': 0.147139772772789, 'timestamp': '2025-09-30 22:35:23.162970', 'step': 14718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:23.220389', 'step': 14718, 'epoch': 2} {'type': 'loss', 'content': 0.1460571438074112, 'timestamp': '2025-09-30 22:35:23.230181', 'step': 14719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:23.288521', 'step': 14719, 'epoch': 2} {'type': 'loss', 'content': 0.184738427400589, 'timestamp': '2025-09-30 22:35:23.295795', 'step': 14720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:23.351971', 'step': 14720, 'epoch': 2} {'type': 'loss', 'content': 0.19000348448753357, 'timestamp': '2025-09-30 22:35:23.355142', 'step': 14721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:23.410967', 'step': 14721, 'epoch': 2} {'type': 'loss', 'content': 0.07364898175001144, 'timestamp': '2025-09-30 22:35:23.415850', 'step': 14722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:23.475024', 'step': 14722, 'epoch': 2} {'type': 'loss', 'content': 0.11355432122945786, 'timestamp': '2025-09-30 22:35:23.479394', 'step': 14723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:23.545525', 'step': 14723, 'epoch': 2} {'type': 'loss', 'content': 0.06341671943664551, 'timestamp': '2025-09-30 22:35:23.554465', 'step': 14724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:23.623577', 'step': 14724, 'epoch': 2} {'type': 'loss', 'content': 0.212117537856102, 'timestamp': '2025-09-30 22:35:23.629571', 'step': 14725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:23.705882', 'step': 14725, 'epoch': 2} {'type': 'loss', 'content': 0.11922086030244827, 'timestamp': '2025-09-30 22:35:23.724688', 'step': 14726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:23.782516', 'step': 14726, 'epoch': 2} {'type': 'loss', 'content': 0.03964487463235855, 'timestamp': '2025-09-30 22:35:23.789467', 'step': 14727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:23.852777', 'step': 14727, 'epoch': 2} {'type': 'loss', 'content': 0.1298397183418274, 'timestamp': '2025-09-30 22:35:23.859382', 'step': 14728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:23.915036', 'step': 14728, 'epoch': 2} {'type': 'loss', 'content': 0.14910602569580078, 'timestamp': '2025-09-30 22:35:23.922012', 'step': 14729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:23.996194', 'step': 14729, 'epoch': 2} {'type': 'loss', 'content': 0.1322159618139267, 'timestamp': '2025-09-30 22:35:24.003742', 'step': 14730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:24.066949', 'step': 14730, 'epoch': 2} {'type': 'loss', 'content': 0.1730155497789383, 'timestamp': '2025-09-30 22:35:24.070415', 'step': 14731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:24.129511', 'step': 14731, 'epoch': 2} {'type': 'loss', 'content': 0.12419658154249191, 'timestamp': '2025-09-30 22:35:24.135964', 'step': 14732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:24.191526', 'step': 14732, 'epoch': 2} {'type': 'loss', 'content': 0.14157286286354065, 'timestamp': '2025-09-30 22:35:24.194352', 'step': 14733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:24.250522', 'step': 14733, 'epoch': 2} {'type': 'loss', 'content': 0.10763995349407196, 'timestamp': '2025-09-30 22:35:24.253451', 'step': 14734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:24.311051', 'step': 14734, 'epoch': 2} {'type': 'loss', 'content': 0.06282184273004532, 'timestamp': '2025-09-30 22:35:24.318924', 'step': 14735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:24.380943', 'step': 14735, 'epoch': 2} {'type': 'loss', 'content': 0.17958824336528778, 'timestamp': '2025-09-30 22:35:24.389270', 'step': 14736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:24.453357', 'step': 14736, 'epoch': 2} {'type': 'loss', 'content': 0.08098051697015762, 'timestamp': '2025-09-30 22:35:24.457937', 'step': 14737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:24.528460', 'step': 14737, 'epoch': 2} {'type': 'loss', 'content': 0.1648303121328354, 'timestamp': '2025-09-30 22:35:24.532007', 'step': 14738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:24.588288', 'step': 14738, 'epoch': 2} {'type': 'loss', 'content': 0.07619119435548782, 'timestamp': '2025-09-30 22:35:24.591400', 'step': 14739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:24.648506', 'step': 14739, 'epoch': 2} {'type': 'loss', 'content': 0.09030105173587799, 'timestamp': '2025-09-30 22:35:24.655314', 'step': 14740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:24.712098', 'step': 14740, 'epoch': 2} {'type': 'loss', 'content': 0.22268570959568024, 'timestamp': '2025-09-30 22:35:24.715544', 'step': 14741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:24.771978', 'step': 14741, 'epoch': 2} {'type': 'loss', 'content': 0.0763009563088417, 'timestamp': '2025-09-30 22:35:24.774452', 'step': 14742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:24.830552', 'step': 14742, 'epoch': 2} {'type': 'loss', 'content': 0.13482171297073364, 'timestamp': '2025-09-30 22:35:24.833162', 'step': 14743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:24.889820', 'step': 14743, 'epoch': 2} {'type': 'loss', 'content': 0.01472310721874237, 'timestamp': '2025-09-30 22:35:24.900442', 'step': 14744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:24.957035', 'step': 14744, 'epoch': 2} {'type': 'loss', 'content': 0.039138611406087875, 'timestamp': '2025-09-30 22:35:24.964838', 'step': 14745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:25.021119', 'step': 14745, 'epoch': 2} {'type': 'loss', 'content': 0.11589912325143814, 'timestamp': '2025-09-30 22:35:25.024778', 'step': 14746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:25.086004', 'step': 14746, 'epoch': 2} {'type': 'loss', 'content': 0.0984562411904335, 'timestamp': '2025-09-30 22:35:25.089235', 'step': 14747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.146236', 'step': 14747, 'epoch': 2} {'type': 'loss', 'content': 0.1090494766831398, 'timestamp': '2025-09-30 22:35:25.152397', 'step': 14748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:25.207919', 'step': 14748, 'epoch': 2} {'type': 'loss', 'content': 0.059148676693439484, 'timestamp': '2025-09-30 22:35:25.211045', 'step': 14749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.270617', 'step': 14749, 'epoch': 2} {'type': 'loss', 'content': 0.12930946052074432, 'timestamp': '2025-09-30 22:35:25.273651', 'step': 14750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:25.331818', 'step': 14750, 'epoch': 2} {'type': 'loss', 'content': 0.08861049264669418, 'timestamp': '2025-09-30 22:35:25.336130', 'step': 14751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.392964', 'step': 14751, 'epoch': 2} {'type': 'loss', 'content': 0.11887460201978683, 'timestamp': '2025-09-30 22:35:25.406738', 'step': 14752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.464767', 'step': 14752, 'epoch': 2} {'type': 'loss', 'content': 0.11421789228916168, 'timestamp': '2025-09-30 22:35:25.467682', 'step': 14753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.524113', 'step': 14753, 'epoch': 2} {'type': 'loss', 'content': 0.09177501499652863, 'timestamp': '2025-09-30 22:35:25.527818', 'step': 14754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.588981', 'step': 14754, 'epoch': 2} {'type': 'loss', 'content': 0.09284888207912445, 'timestamp': '2025-09-30 22:35:25.596440', 'step': 14755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.653061', 'step': 14755, 'epoch': 2} {'type': 'loss', 'content': 0.08632620424032211, 'timestamp': '2025-09-30 22:35:25.659960', 'step': 14756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:25.716438', 'step': 14756, 'epoch': 2} {'type': 'loss', 'content': 0.11325503885746002, 'timestamp': '2025-09-30 22:35:25.719511', 'step': 14757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.776576', 'step': 14757, 'epoch': 2} {'type': 'loss', 'content': 0.14494435489177704, 'timestamp': '2025-09-30 22:35:25.788765', 'step': 14758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:25.845788', 'step': 14758, 'epoch': 2} {'type': 'loss', 'content': 0.04710957407951355, 'timestamp': '2025-09-30 22:35:25.848761', 'step': 14759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:25.905181', 'step': 14759, 'epoch': 2} {'type': 'loss', 'content': 0.07314648479223251, 'timestamp': '2025-09-30 22:35:25.913388', 'step': 14760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:25.969768', 'step': 14760, 'epoch': 2} {'type': 'loss', 'content': 0.11916307359933853, 'timestamp': '2025-09-30 22:35:25.973180', 'step': 14761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:26.029947', 'step': 14761, 'epoch': 2} {'type': 'loss', 'content': 0.11520593613386154, 'timestamp': '2025-09-30 22:35:26.032630', 'step': 14762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:26.092177', 'step': 14762, 'epoch': 2} {'type': 'loss', 'content': 0.10661822557449341, 'timestamp': '2025-09-30 22:35:26.094913', 'step': 14763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:26.154623', 'step': 14763, 'epoch': 2} {'type': 'loss', 'content': 0.07035870850086212, 'timestamp': '2025-09-30 22:35:26.161798', 'step': 14764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:26.218343', 'step': 14764, 'epoch': 2} {'type': 'loss', 'content': 0.14276234805583954, 'timestamp': '2025-09-30 22:35:26.221014', 'step': 14765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:26.278110', 'step': 14765, 'epoch': 2} {'type': 'loss', 'content': 0.15162862837314606, 'timestamp': '2025-09-30 22:35:26.280815', 'step': 14766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:26.336482', 'step': 14766, 'epoch': 2} {'type': 'loss', 'content': 0.0824456587433815, 'timestamp': '2025-09-30 22:35:26.339454', 'step': 14767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:26.396102', 'step': 14767, 'epoch': 2} {'type': 'loss', 'content': 0.13171768188476562, 'timestamp': '2025-09-30 22:35:26.403290', 'step': 14768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:26.459113', 'step': 14768, 'epoch': 2} {'type': 'loss', 'content': 0.12052834779024124, 'timestamp': '2025-09-30 22:35:26.461902', 'step': 14769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:26.528254', 'step': 14769, 'epoch': 2} {'type': 'loss', 'content': 0.14528991281986237, 'timestamp': '2025-09-30 22:35:26.531570', 'step': 14770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:26.586941', 'step': 14770, 'epoch': 2} {'type': 'loss', 'content': 0.10828126221895218, 'timestamp': '2025-09-30 22:35:26.590005', 'step': 14771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:26.652872', 'step': 14771, 'epoch': 2} {'type': 'loss', 'content': 0.14709658920764923, 'timestamp': '2025-09-30 22:35:26.666271', 'step': 14772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:26.730770', 'step': 14772, 'epoch': 2} {'type': 'loss', 'content': 0.15320609509944916, 'timestamp': '2025-09-30 22:35:26.733606', 'step': 14773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:26.789542', 'step': 14773, 'epoch': 2} {'type': 'loss', 'content': 0.15376992523670197, 'timestamp': '2025-09-30 22:35:26.792934', 'step': 14774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:26.857624', 'step': 14774, 'epoch': 2} {'type': 'loss', 'content': 0.1169363334774971, 'timestamp': '2025-09-30 22:35:26.860570', 'step': 14775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:26.918287', 'step': 14775, 'epoch': 2} {'type': 'loss', 'content': 0.08339797705411911, 'timestamp': '2025-09-30 22:35:26.924591', 'step': 14776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:26.981269', 'step': 14776, 'epoch': 2} {'type': 'loss', 'content': 0.15626153349876404, 'timestamp': '2025-09-30 22:35:26.984356', 'step': 14777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:27.041721', 'step': 14777, 'epoch': 2} {'type': 'loss', 'content': 0.15673542022705078, 'timestamp': '2025-09-30 22:35:27.044094', 'step': 14778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:27.100579', 'step': 14778, 'epoch': 2} {'type': 'loss', 'content': 0.08490072935819626, 'timestamp': '2025-09-30 22:35:27.103126', 'step': 14779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:27.159468', 'step': 14779, 'epoch': 2} {'type': 'loss', 'content': 0.15054713189601898, 'timestamp': '2025-09-30 22:35:27.166070', 'step': 14780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:27.221570', 'step': 14780, 'epoch': 2} {'type': 'loss', 'content': 0.19892117381095886, 'timestamp': '2025-09-30 22:35:27.224853', 'step': 14781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:27.291519', 'step': 14781, 'epoch': 2} {'type': 'loss', 'content': 0.14712966978549957, 'timestamp': '2025-09-30 22:35:27.294161', 'step': 14782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:27.351319', 'step': 14782, 'epoch': 2} {'type': 'loss', 'content': 0.10557740926742554, 'timestamp': '2025-09-30 22:35:27.354981', 'step': 14783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:27.415795', 'step': 14783, 'epoch': 2} {'type': 'loss', 'content': 0.051189932972192764, 'timestamp': '2025-09-30 22:35:27.426476', 'step': 14784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:27.484738', 'step': 14784, 'epoch': 2} {'type': 'loss', 'content': 0.09473172575235367, 'timestamp': '2025-09-30 22:35:27.487063', 'step': 14785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:27.546179', 'step': 14785, 'epoch': 2} {'type': 'loss', 'content': 0.12845125794410706, 'timestamp': '2025-09-30 22:35:27.549638', 'step': 14786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:27.609460', 'step': 14786, 'epoch': 2} {'type': 'loss', 'content': 0.0803750678896904, 'timestamp': '2025-09-30 22:35:27.612359', 'step': 14787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:27.677569', 'step': 14787, 'epoch': 2} {'type': 'loss', 'content': 0.10377220064401627, 'timestamp': '2025-09-30 22:35:27.685040', 'step': 14788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:27.748327', 'step': 14788, 'epoch': 2} {'type': 'loss', 'content': 0.1097477525472641, 'timestamp': '2025-09-30 22:35:27.751254', 'step': 14789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:27.808371', 'step': 14789, 'epoch': 2} {'type': 'loss', 'content': 0.17634043097496033, 'timestamp': '2025-09-30 22:35:27.811934', 'step': 14790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:27.873788', 'step': 14790, 'epoch': 2} {'type': 'loss', 'content': 0.0712384358048439, 'timestamp': '2025-09-30 22:35:27.883532', 'step': 14791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:27.941604', 'step': 14791, 'epoch': 2} {'type': 'loss', 'content': 0.12827053666114807, 'timestamp': '2025-09-30 22:35:27.947868', 'step': 14792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:28.004427', 'step': 14792, 'epoch': 2} {'type': 'loss', 'content': 0.10849235951900482, 'timestamp': '2025-09-30 22:35:28.007205', 'step': 14793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:28.067068', 'step': 14793, 'epoch': 2} {'type': 'loss', 'content': 0.06257277727127075, 'timestamp': '2025-09-30 22:35:28.074509', 'step': 14794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:28.131899', 'step': 14794, 'epoch': 2} {'type': 'loss', 'content': 0.07912526279687881, 'timestamp': '2025-09-30 22:35:28.134883', 'step': 14795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:28.193934', 'step': 14795, 'epoch': 2} {'type': 'loss', 'content': 0.15783032774925232, 'timestamp': '2025-09-30 22:35:28.200280', 'step': 14796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:28.262238', 'step': 14796, 'epoch': 2} {'type': 'loss', 'content': 0.17870643734931946, 'timestamp': '2025-09-30 22:35:28.268997', 'step': 14797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:28.326238', 'step': 14797, 'epoch': 2} {'type': 'loss', 'content': 0.07108556479215622, 'timestamp': '2025-09-30 22:35:28.329325', 'step': 14798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:28.387233', 'step': 14798, 'epoch': 2} {'type': 'loss', 'content': 0.15311387181282043, 'timestamp': '2025-09-30 22:35:28.390473', 'step': 14799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:28.448790', 'step': 14799, 'epoch': 2} {'type': 'loss', 'content': 0.11005119234323502, 'timestamp': '2025-09-30 22:35:28.455041', 'step': 14800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:28.513138', 'step': 14800, 'epoch': 2} {'type': 'loss', 'content': 0.14893019199371338, 'timestamp': '2025-09-30 22:35:28.515637', 'step': 14801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:28.582327', 'step': 14801, 'epoch': 2} {'type': 'loss', 'content': 0.1717238873243332, 'timestamp': '2025-09-30 22:35:28.585028', 'step': 14802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:28.642973', 'step': 14802, 'epoch': 2} {'type': 'loss', 'content': 0.062326133251190186, 'timestamp': '2025-09-30 22:35:28.646380', 'step': 14803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:28.710425', 'step': 14803, 'epoch': 2} {'type': 'loss', 'content': 0.22124500572681427, 'timestamp': '2025-09-30 22:35:28.717621', 'step': 14804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:28.778452', 'step': 14804, 'epoch': 2} {'type': 'loss', 'content': 0.09691227972507477, 'timestamp': '2025-09-30 22:35:28.782079', 'step': 14805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:28.839223', 'step': 14805, 'epoch': 2} {'type': 'loss', 'content': 0.15121309459209442, 'timestamp': '2025-09-30 22:35:28.848440', 'step': 14806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:28.907243', 'step': 14806, 'epoch': 2} {'type': 'loss', 'content': 0.09213711321353912, 'timestamp': '2025-09-30 22:35:28.910520', 'step': 14807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:28.967960', 'step': 14807, 'epoch': 2} {'type': 'loss', 'content': 0.09639154374599457, 'timestamp': '2025-09-30 22:35:28.974672', 'step': 14808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:29.035523', 'step': 14808, 'epoch': 2} {'type': 'loss', 'content': 0.02927895449101925, 'timestamp': '2025-09-30 22:35:29.047434', 'step': 14809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:29.104858', 'step': 14809, 'epoch': 2} {'type': 'loss', 'content': 0.09873922914266586, 'timestamp': '2025-09-30 22:35:29.110080', 'step': 14810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:29.166688', 'step': 14810, 'epoch': 2} {'type': 'loss', 'content': 0.08732689172029495, 'timestamp': '2025-09-30 22:35:29.169569', 'step': 14811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:29.227373', 'step': 14811, 'epoch': 2} {'type': 'loss', 'content': 0.13866791129112244, 'timestamp': '2025-09-30 22:35:29.234794', 'step': 14812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:29.290871', 'step': 14812, 'epoch': 2} {'type': 'loss', 'content': 0.11457455158233643, 'timestamp': '2025-09-30 22:35:29.294253', 'step': 14813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:29.355281', 'step': 14813, 'epoch': 2} {'type': 'loss', 'content': 0.10608450323343277, 'timestamp': '2025-09-30 22:35:29.358802', 'step': 14814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:29.415869', 'step': 14814, 'epoch': 2} {'type': 'loss', 'content': 0.1556263566017151, 'timestamp': '2025-09-30 22:35:29.418311', 'step': 14815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:29.475050', 'step': 14815, 'epoch': 2} {'type': 'loss', 'content': 0.0924501121044159, 'timestamp': '2025-09-30 22:35:29.481529', 'step': 14816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:29.538503', 'step': 14816, 'epoch': 2} {'type': 'loss', 'content': 0.18676908314228058, 'timestamp': '2025-09-30 22:35:29.542054', 'step': 14817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:29.600117', 'step': 14817, 'epoch': 2} {'type': 'loss', 'content': 0.05559481307864189, 'timestamp': '2025-09-30 22:35:29.602484', 'step': 14818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:29.660068', 'step': 14818, 'epoch': 2} {'type': 'loss', 'content': 0.09986603260040283, 'timestamp': '2025-09-30 22:35:29.662975', 'step': 14819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:29.722346', 'step': 14819, 'epoch': 2} {'type': 'loss', 'content': 0.10527995973825455, 'timestamp': '2025-09-30 22:35:29.729229', 'step': 14820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:29.785712', 'step': 14820, 'epoch': 2} {'type': 'loss', 'content': 0.14350701868534088, 'timestamp': '2025-09-30 22:35:29.788967', 'step': 14821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:29.846595', 'step': 14821, 'epoch': 2} {'type': 'loss', 'content': 0.11861206591129303, 'timestamp': '2025-09-30 22:35:29.849165', 'step': 14822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:29.906265', 'step': 14822, 'epoch': 2} {'type': 'loss', 'content': 0.04931341111660004, 'timestamp': '2025-09-30 22:35:29.908938', 'step': 14823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:29.966855', 'step': 14823, 'epoch': 2} {'type': 'loss', 'content': 0.1358165293931961, 'timestamp': '2025-09-30 22:35:29.974276', 'step': 14824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:30.031524', 'step': 14824, 'epoch': 2} {'type': 'loss', 'content': 0.1765129268169403, 'timestamp': '2025-09-30 22:35:30.034031', 'step': 14825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:30.092536', 'step': 14825, 'epoch': 2} {'type': 'loss', 'content': 0.055170390754938126, 'timestamp': '2025-09-30 22:35:30.095673', 'step': 14826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:30.154655', 'step': 14826, 'epoch': 2} {'type': 'loss', 'content': 0.06137728691101074, 'timestamp': '2025-09-30 22:35:30.157822', 'step': 14827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:30.218101', 'step': 14827, 'epoch': 2} {'type': 'loss', 'content': 0.1553095579147339, 'timestamp': '2025-09-30 22:35:30.224394', 'step': 14828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:30.281503', 'step': 14828, 'epoch': 2} {'type': 'loss', 'content': 0.1168212741613388, 'timestamp': '2025-09-30 22:35:30.284758', 'step': 14829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:30.343060', 'step': 14829, 'epoch': 2} {'type': 'loss', 'content': 0.1020755022764206, 'timestamp': '2025-09-30 22:35:30.361747', 'step': 14830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:35:30.422879', 'step': 14830, 'epoch': 2} {'type': 'loss', 'content': 0.10790050029754639, 'timestamp': '2025-09-30 22:35:30.427237', 'step': 14831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:30.485236', 'step': 14831, 'epoch': 2} {'type': 'loss', 'content': 0.14261046051979065, 'timestamp': '2025-09-30 22:35:30.491913', 'step': 14832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:30.554380', 'step': 14832, 'epoch': 2} {'type': 'loss', 'content': 0.1318492293357849, 'timestamp': '2025-09-30 22:35:30.557753', 'step': 14833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:30.616069', 'step': 14833, 'epoch': 2} {'type': 'loss', 'content': 0.1522386223077774, 'timestamp': '2025-09-30 22:35:30.627541', 'step': 14834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:30.694862', 'step': 14834, 'epoch': 2} {'type': 'loss', 'content': 0.11390242725610733, 'timestamp': '2025-09-30 22:35:30.697593', 'step': 14835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:30.754133', 'step': 14835, 'epoch': 2} {'type': 'loss', 'content': 0.17389905452728271, 'timestamp': '2025-09-30 22:35:30.760902', 'step': 14836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:30.816927', 'step': 14836, 'epoch': 2} {'type': 'loss', 'content': 0.16278913617134094, 'timestamp': '2025-09-30 22:35:30.819595', 'step': 14837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:30.880427', 'step': 14837, 'epoch': 2} {'type': 'loss', 'content': 0.11305436491966248, 'timestamp': '2025-09-30 22:35:30.885217', 'step': 14838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:30.943695', 'step': 14838, 'epoch': 2} {'type': 'loss', 'content': 0.0746719092130661, 'timestamp': '2025-09-30 22:35:30.947024', 'step': 14839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:31.005040', 'step': 14839, 'epoch': 2} {'type': 'loss', 'content': 0.13350555300712585, 'timestamp': '2025-09-30 22:35:31.011802', 'step': 14840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:31.073277', 'step': 14840, 'epoch': 2} {'type': 'loss', 'content': 0.21632546186447144, 'timestamp': '2025-09-30 22:35:31.077109', 'step': 14841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:31.138519', 'step': 14841, 'epoch': 2} {'type': 'loss', 'content': 0.12643031775951385, 'timestamp': '2025-09-30 22:35:31.142631', 'step': 14842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:31.214245', 'step': 14842, 'epoch': 2} {'type': 'loss', 'content': 0.12258931994438171, 'timestamp': '2025-09-30 22:35:31.225141', 'step': 14843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:31.286589', 'step': 14843, 'epoch': 2} {'type': 'loss', 'content': 0.09932893514633179, 'timestamp': '2025-09-30 22:35:31.293007', 'step': 14844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:31.349729', 'step': 14844, 'epoch': 2} {'type': 'loss', 'content': 0.16815073788166046, 'timestamp': '2025-09-30 22:35:31.353427', 'step': 14845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:31.409644', 'step': 14845, 'epoch': 2} {'type': 'loss', 'content': 0.0703367218375206, 'timestamp': '2025-09-30 22:35:31.413850', 'step': 14846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:31.474299', 'step': 14846, 'epoch': 2} {'type': 'loss', 'content': 0.05166114494204521, 'timestamp': '2025-09-30 22:35:31.477132', 'step': 14847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:31.537763', 'step': 14847, 'epoch': 2} {'type': 'loss', 'content': 0.09610989689826965, 'timestamp': '2025-09-30 22:35:31.545000', 'step': 14848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:31.609437', 'step': 14848, 'epoch': 2} {'type': 'loss', 'content': 0.16932527720928192, 'timestamp': '2025-09-30 22:35:31.611996', 'step': 14849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:31.670314', 'step': 14849, 'epoch': 2} {'type': 'loss', 'content': 0.07658476382493973, 'timestamp': '2025-09-30 22:35:31.674076', 'step': 14850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:31.737400', 'step': 14850, 'epoch': 2} {'type': 'loss', 'content': 0.16285507380962372, 'timestamp': '2025-09-30 22:35:31.743191', 'step': 14851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:31.803875', 'step': 14851, 'epoch': 2} {'type': 'loss', 'content': 0.1111280545592308, 'timestamp': '2025-09-30 22:35:31.813312', 'step': 14852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:31.873878', 'step': 14852, 'epoch': 2} {'type': 'loss', 'content': 0.15106183290481567, 'timestamp': '2025-09-30 22:35:31.877066', 'step': 14853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:31.943110', 'step': 14853, 'epoch': 2} {'type': 'loss', 'content': 0.13142764568328857, 'timestamp': '2025-09-30 22:35:31.948832', 'step': 14854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:32.010777', 'step': 14854, 'epoch': 2} {'type': 'loss', 'content': 0.0811162069439888, 'timestamp': '2025-09-30 22:35:32.013741', 'step': 14855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.081836', 'step': 14855, 'epoch': 2} {'type': 'loss', 'content': 0.09456492215394974, 'timestamp': '2025-09-30 22:35:32.090225', 'step': 14856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:32.152944', 'step': 14856, 'epoch': 2} {'type': 'loss', 'content': 0.10996587574481964, 'timestamp': '2025-09-30 22:35:32.155774', 'step': 14857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:32.220158', 'step': 14857, 'epoch': 2} {'type': 'loss', 'content': 0.06520356982946396, 'timestamp': '2025-09-30 22:35:32.227917', 'step': 14858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.290883', 'step': 14858, 'epoch': 2} {'type': 'loss', 'content': 0.09375131130218506, 'timestamp': '2025-09-30 22:35:32.295751', 'step': 14859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.357684', 'step': 14859, 'epoch': 2} {'type': 'loss', 'content': 0.09688303619623184, 'timestamp': '2025-09-30 22:35:32.364880', 'step': 14860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:32.425566', 'step': 14860, 'epoch': 2} {'type': 'loss', 'content': 0.16763855516910553, 'timestamp': '2025-09-30 22:35:32.429863', 'step': 14861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.487132', 'step': 14861, 'epoch': 2} {'type': 'loss', 'content': 0.12554319202899933, 'timestamp': '2025-09-30 22:35:32.490019', 'step': 14862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:32.547748', 'step': 14862, 'epoch': 2} {'type': 'loss', 'content': 0.1383204460144043, 'timestamp': '2025-09-30 22:35:32.552642', 'step': 14863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.610707', 'step': 14863, 'epoch': 2} {'type': 'loss', 'content': 0.130422443151474, 'timestamp': '2025-09-30 22:35:32.617650', 'step': 14864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:32.673844', 'step': 14864, 'epoch': 2} {'type': 'loss', 'content': 0.0957556962966919, 'timestamp': '2025-09-30 22:35:32.678019', 'step': 14865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:32.735197', 'step': 14865, 'epoch': 2} {'type': 'loss', 'content': 0.11290252953767776, 'timestamp': '2025-09-30 22:35:32.748014', 'step': 14866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.805899', 'step': 14866, 'epoch': 2} {'type': 'loss', 'content': 0.12133869528770447, 'timestamp': '2025-09-30 22:35:32.809388', 'step': 14867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.867495', 'step': 14867, 'epoch': 2} {'type': 'loss', 'content': 0.2755160331726074, 'timestamp': '2025-09-30 22:35:32.874395', 'step': 14868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:32.931285', 'step': 14868, 'epoch': 2} {'type': 'loss', 'content': 0.19637224078178406, 'timestamp': '2025-09-30 22:35:32.937071', 'step': 14869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:32.997675', 'step': 14869, 'epoch': 2} {'type': 'loss', 'content': 0.08354607969522476, 'timestamp': '2025-09-30 22:35:33.001771', 'step': 14870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:33.060651', 'step': 14870, 'epoch': 2} {'type': 'loss', 'content': 0.0536850206553936, 'timestamp': '2025-09-30 22:35:33.063374', 'step': 14871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:33.126050', 'step': 14871, 'epoch': 2} {'type': 'loss', 'content': 0.08535625040531158, 'timestamp': '2025-09-30 22:35:33.141303', 'step': 14872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:33.203526', 'step': 14872, 'epoch': 2} {'type': 'loss', 'content': 0.13708186149597168, 'timestamp': '2025-09-30 22:35:33.220381', 'step': 14873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:33.305652', 'step': 14873, 'epoch': 2} {'type': 'loss', 'content': 0.042844563722610474, 'timestamp': '2025-09-30 22:35:33.311498', 'step': 14874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:33.369521', 'step': 14874, 'epoch': 2} {'type': 'loss', 'content': 0.05084474757313728, 'timestamp': '2025-09-30 22:35:33.380926', 'step': 14875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:33.440184', 'step': 14875, 'epoch': 2} {'type': 'loss', 'content': 0.07311241328716278, 'timestamp': '2025-09-30 22:35:33.447339', 'step': 14876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:33.505619', 'step': 14876, 'epoch': 2} {'type': 'loss', 'content': 0.1666681468486786, 'timestamp': '2025-09-30 22:35:33.509040', 'step': 14877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:33.568551', 'step': 14877, 'epoch': 2} {'type': 'loss', 'content': 0.07007952034473419, 'timestamp': '2025-09-30 22:35:33.578371', 'step': 14878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:33.636203', 'step': 14878, 'epoch': 2} {'type': 'loss', 'content': 0.11274661868810654, 'timestamp': '2025-09-30 22:35:33.648386', 'step': 14879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:33.715330', 'step': 14879, 'epoch': 2} {'type': 'loss', 'content': 0.11197004467248917, 'timestamp': '2025-09-30 22:35:33.722316', 'step': 14880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:33.785544', 'step': 14880, 'epoch': 2} {'type': 'loss', 'content': 0.032737407833337784, 'timestamp': '2025-09-30 22:35:33.790688', 'step': 14881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:33.853285', 'step': 14881, 'epoch': 2} {'type': 'loss', 'content': 0.12217587977647781, 'timestamp': '2025-09-30 22:35:33.863292', 'step': 14882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:33.922115', 'step': 14882, 'epoch': 2} {'type': 'loss', 'content': 0.10840330272912979, 'timestamp': '2025-09-30 22:35:33.926348', 'step': 14883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:33.985065', 'step': 14883, 'epoch': 2} {'type': 'loss', 'content': 0.08466748893260956, 'timestamp': '2025-09-30 22:35:33.993048', 'step': 14884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:34.049798', 'step': 14884, 'epoch': 2} {'type': 'loss', 'content': 0.17157666385173798, 'timestamp': '2025-09-30 22:35:34.052843', 'step': 14885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:34.114240', 'step': 14885, 'epoch': 2} {'type': 'loss', 'content': 0.09265122562646866, 'timestamp': '2025-09-30 22:35:34.117000', 'step': 14886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:34.174974', 'step': 14886, 'epoch': 2} {'type': 'loss', 'content': 0.0901075080037117, 'timestamp': '2025-09-30 22:35:34.178456', 'step': 14887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:34.235177', 'step': 14887, 'epoch': 2} {'type': 'loss', 'content': 0.061699818819761276, 'timestamp': '2025-09-30 22:35:34.243114', 'step': 14888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:34.302070', 'step': 14888, 'epoch': 2} {'type': 'loss', 'content': 0.1455172747373581, 'timestamp': '2025-09-30 22:35:34.304910', 'step': 14889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:34.368791', 'step': 14889, 'epoch': 2} {'type': 'loss', 'content': 0.09201806783676147, 'timestamp': '2025-09-30 22:35:34.372900', 'step': 14890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:34.439072', 'step': 14890, 'epoch': 2} {'type': 'loss', 'content': 0.15314626693725586, 'timestamp': '2025-09-30 22:35:34.444366', 'step': 14891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:34.502647', 'step': 14891, 'epoch': 2} {'type': 'loss', 'content': 0.15906460583209991, 'timestamp': '2025-09-30 22:35:34.509374', 'step': 14892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:34.571690', 'step': 14892, 'epoch': 2} {'type': 'loss', 'content': 0.0781593844294548, 'timestamp': '2025-09-30 22:35:34.580822', 'step': 14893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:34.638130', 'step': 14893, 'epoch': 2} {'type': 'loss', 'content': 0.11456749588251114, 'timestamp': '2025-09-30 22:35:34.641438', 'step': 14894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:34.705342', 'step': 14894, 'epoch': 2} {'type': 'loss', 'content': 0.12561985850334167, 'timestamp': '2025-09-30 22:35:34.712139', 'step': 14895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:34.769517', 'step': 14895, 'epoch': 2} {'type': 'loss', 'content': 0.07096128165721893, 'timestamp': '2025-09-30 22:35:34.776891', 'step': 14896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:34.840305', 'step': 14896, 'epoch': 2} {'type': 'loss', 'content': 0.1332995891571045, 'timestamp': '2025-09-30 22:35:34.843038', 'step': 14897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:34.903286', 'step': 14897, 'epoch': 2} {'type': 'loss', 'content': 0.044090062379837036, 'timestamp': '2025-09-30 22:35:34.907770', 'step': 14898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:34.969417', 'step': 14898, 'epoch': 2} {'type': 'loss', 'content': 0.09434079378843307, 'timestamp': '2025-09-30 22:35:34.972232', 'step': 14899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:35.030558', 'step': 14899, 'epoch': 2} {'type': 'loss', 'content': 0.159918874502182, 'timestamp': '2025-09-30 22:35:35.036587', 'step': 14900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:35.092808', 'step': 14900, 'epoch': 2} {'type': 'loss', 'content': 0.10542583465576172, 'timestamp': '2025-09-30 22:35:35.101592', 'step': 14901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:35.158953', 'step': 14901, 'epoch': 2} {'type': 'loss', 'content': 0.10386290401220322, 'timestamp': '2025-09-30 22:35:35.161639', 'step': 14902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:35:35.219106', 'step': 14902, 'epoch': 2} {'type': 'loss', 'content': 0.22120694816112518, 'timestamp': '2025-09-30 22:35:35.222416', 'step': 14903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:35.281042', 'step': 14903, 'epoch': 2} {'type': 'loss', 'content': 0.04711262509226799, 'timestamp': '2025-09-30 22:35:35.287195', 'step': 14904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:35.348725', 'step': 14904, 'epoch': 2} {'type': 'loss', 'content': 0.10612175613641739, 'timestamp': '2025-09-30 22:35:35.351505', 'step': 14905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:35.413984', 'step': 14905, 'epoch': 2} {'type': 'loss', 'content': 0.09567819535732269, 'timestamp': '2025-09-30 22:35:35.416768', 'step': 14906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:35.479587', 'step': 14906, 'epoch': 2} {'type': 'loss', 'content': 0.17356081306934357, 'timestamp': '2025-09-30 22:35:35.489006', 'step': 14907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:35.549437', 'step': 14907, 'epoch': 2} {'type': 'loss', 'content': 0.16535469889640808, 'timestamp': '2025-09-30 22:35:35.555361', 'step': 14908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:35.626094', 'step': 14908, 'epoch': 2} {'type': 'loss', 'content': 0.11310787498950958, 'timestamp': '2025-09-30 22:35:35.629780', 'step': 14909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:35.686845', 'step': 14909, 'epoch': 2} {'type': 'loss', 'content': 0.09418634325265884, 'timestamp': '2025-09-30 22:35:35.690192', 'step': 14910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:35.747040', 'step': 14910, 'epoch': 2} {'type': 'loss', 'content': 0.12573519349098206, 'timestamp': '2025-09-30 22:35:35.749641', 'step': 14911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:35.813429', 'step': 14911, 'epoch': 2} {'type': 'loss', 'content': 0.0421173982322216, 'timestamp': '2025-09-30 22:35:35.821616', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:35:49.868834', 'step': 14912, 'epoch': 2} {'type': 'pplx', 'content': 8705.877378849334, 'timestamp': '2025-09-30 22:35:49.877412', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:49.934455', 'step': 14912, 'epoch': 2} {'type': 'loss', 'content': 0.09945141524076462, 'timestamp': '2025-09-30 22:35:49.938354', 'step': 14913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:49.998953', 'step': 14913, 'epoch': 2} {'type': 'loss', 'content': 0.044739555567502975, 'timestamp': '2025-09-30 22:35:50.002663', 'step': 14914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:50.060335', 'step': 14914, 'epoch': 2} {'type': 'loss', 'content': 0.3355751633644104, 'timestamp': '2025-09-30 22:35:50.070876', 'step': 14915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:50.127399', 'step': 14915, 'epoch': 2} {'type': 'loss', 'content': 0.11902764439582825, 'timestamp': '2025-09-30 22:35:50.135714', 'step': 14916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.198277', 'step': 14916, 'epoch': 2} {'type': 'loss', 'content': 0.12434817850589752, 'timestamp': '2025-09-30 22:35:50.201434', 'step': 14917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.261562', 'step': 14917, 'epoch': 2} {'type': 'loss', 'content': 0.07735475152730942, 'timestamp': '2025-09-30 22:35:50.263980', 'step': 14918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:50.322252', 'step': 14918, 'epoch': 2} {'type': 'loss', 'content': 0.048036254942417145, 'timestamp': '2025-09-30 22:35:50.326003', 'step': 14919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-09-30 22:35:50.382544', 'step': 14919, 'epoch': 2} {'type': 'loss', 'content': 0.21390536427497864, 'timestamp': '2025-09-30 22:35:50.389828', 'step': 14920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.447701', 'step': 14920, 'epoch': 3} {'type': 'loss', 'content': 0.05846158787608147, 'timestamp': '2025-09-30 22:35:50.450625', 'step': 14921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.512943', 'step': 14921, 'epoch': 3} {'type': 'loss', 'content': 0.10413496941328049, 'timestamp': '2025-09-30 22:35:50.517499', 'step': 14922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:50.576341', 'step': 14922, 'epoch': 3} {'type': 'loss', 'content': 0.08800334483385086, 'timestamp': '2025-09-30 22:35:50.579873', 'step': 14923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:50.637098', 'step': 14923, 'epoch': 3} {'type': 'loss', 'content': 0.0834694504737854, 'timestamp': '2025-09-30 22:35:50.644474', 'step': 14924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:50.704048', 'step': 14924, 'epoch': 3} {'type': 'loss', 'content': 0.16704069077968597, 'timestamp': '2025-09-30 22:35:50.707809', 'step': 14925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.767689', 'step': 14925, 'epoch': 3} {'type': 'loss', 'content': 0.059293828904628754, 'timestamp': '2025-09-30 22:35:50.772112', 'step': 14926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.837985', 'step': 14926, 'epoch': 3} {'type': 'loss', 'content': 0.13959839940071106, 'timestamp': '2025-09-30 22:35:50.842219', 'step': 14927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.900776', 'step': 14927, 'epoch': 3} {'type': 'loss', 'content': 0.036399148404598236, 'timestamp': '2025-09-30 22:35:50.907633', 'step': 14928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:50.966828', 'step': 14928, 'epoch': 3} {'type': 'loss', 'content': 0.08202560991048813, 'timestamp': '2025-09-30 22:35:50.970744', 'step': 14929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:51.037473', 'step': 14929, 'epoch': 3} {'type': 'loss', 'content': 0.09628646075725555, 'timestamp': '2025-09-30 22:35:51.040769', 'step': 14930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:51.101150', 'step': 14930, 'epoch': 3} {'type': 'loss', 'content': 0.056283775717020035, 'timestamp': '2025-09-30 22:35:51.104291', 'step': 14931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:51.172682', 'step': 14931, 'epoch': 3} {'type': 'loss', 'content': 0.12867817282676697, 'timestamp': '2025-09-30 22:35:51.179699', 'step': 14932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:51.243810', 'step': 14932, 'epoch': 3} {'type': 'loss', 'content': 0.09671514481306076, 'timestamp': '2025-09-30 22:35:51.248141', 'step': 14933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:51.319098', 'step': 14933, 'epoch': 3} {'type': 'loss', 'content': 0.08900844305753708, 'timestamp': '2025-09-30 22:35:51.324679', 'step': 14934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:51.383085', 'step': 14934, 'epoch': 3} {'type': 'loss', 'content': 0.10808206349611282, 'timestamp': '2025-09-30 22:35:51.386751', 'step': 14935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:51.450969', 'step': 14935, 'epoch': 3} {'type': 'loss', 'content': 0.16149376332759857, 'timestamp': '2025-09-30 22:35:51.460578', 'step': 14936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:51.518863', 'step': 14936, 'epoch': 3} {'type': 'loss', 'content': 0.047397010028362274, 'timestamp': '2025-09-30 22:35:51.523247', 'step': 14937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:51.582236', 'step': 14937, 'epoch': 3} {'type': 'loss', 'content': 0.1040160208940506, 'timestamp': '2025-09-30 22:35:51.590070', 'step': 14938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:51.648446', 'step': 14938, 'epoch': 3} {'type': 'loss', 'content': 0.19192543625831604, 'timestamp': '2025-09-30 22:35:51.653016', 'step': 14939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:51.710452', 'step': 14939, 'epoch': 3} {'type': 'loss', 'content': 0.056405168026685715, 'timestamp': '2025-09-30 22:35:51.719497', 'step': 14940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:51.775662', 'step': 14940, 'epoch': 3} {'type': 'loss', 'content': 0.07286269217729568, 'timestamp': '2025-09-30 22:35:51.778573', 'step': 14941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:51.842990', 'step': 14941, 'epoch': 3} {'type': 'loss', 'content': 0.11361425369977951, 'timestamp': '2025-09-30 22:35:51.846852', 'step': 14942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:51.906821', 'step': 14942, 'epoch': 3} {'type': 'loss', 'content': 0.05027639865875244, 'timestamp': '2025-09-30 22:35:51.911331', 'step': 14943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:51.970343', 'step': 14943, 'epoch': 3} {'type': 'loss', 'content': 0.1349610835313797, 'timestamp': '2025-09-30 22:35:51.977885', 'step': 14944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:52.034645', 'step': 14944, 'epoch': 3} {'type': 'loss', 'content': 0.04220191389322281, 'timestamp': '2025-09-30 22:35:52.038315', 'step': 14945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:52.107358', 'step': 14945, 'epoch': 3} {'type': 'loss', 'content': 0.12460138648748398, 'timestamp': '2025-09-30 22:35:52.111760', 'step': 14946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:52.171226', 'step': 14946, 'epoch': 3} {'type': 'loss', 'content': 0.1751580387353897, 'timestamp': '2025-09-30 22:35:52.174529', 'step': 14947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:52.232368', 'step': 14947, 'epoch': 3} {'type': 'loss', 'content': 0.08366017788648605, 'timestamp': '2025-09-30 22:35:52.247808', 'step': 14948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:52.305529', 'step': 14948, 'epoch': 3} {'type': 'loss', 'content': 0.10491897910833359, 'timestamp': '2025-09-30 22:35:52.309534', 'step': 14949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:52.367331', 'step': 14949, 'epoch': 3} {'type': 'loss', 'content': 0.04243159294128418, 'timestamp': '2025-09-30 22:35:52.370463', 'step': 14950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:52.435239', 'step': 14950, 'epoch': 3} {'type': 'loss', 'content': 0.12094035744667053, 'timestamp': '2025-09-30 22:35:52.438860', 'step': 14951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:52.504566', 'step': 14951, 'epoch': 3} {'type': 'loss', 'content': 0.06323325634002686, 'timestamp': '2025-09-30 22:35:52.511448', 'step': 14952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:52.569589', 'step': 14952, 'epoch': 3} {'type': 'loss', 'content': 0.15702728927135468, 'timestamp': '2025-09-30 22:35:52.572687', 'step': 14953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:52.638025', 'step': 14953, 'epoch': 3} {'type': 'loss', 'content': 0.09980972111225128, 'timestamp': '2025-09-30 22:35:52.641871', 'step': 14954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:52.700347', 'step': 14954, 'epoch': 3} {'type': 'loss', 'content': 0.1484556347131729, 'timestamp': '2025-09-30 22:35:52.704229', 'step': 14955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:52.773405', 'step': 14955, 'epoch': 3} {'type': 'loss', 'content': 0.04894854500889778, 'timestamp': '2025-09-30 22:35:52.781023', 'step': 14956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:52.838783', 'step': 14956, 'epoch': 3} {'type': 'loss', 'content': 0.07153593748807907, 'timestamp': '2025-09-30 22:35:52.841875', 'step': 14957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:52.911606', 'step': 14957, 'epoch': 3} {'type': 'loss', 'content': 0.0809258446097374, 'timestamp': '2025-09-30 22:35:52.914497', 'step': 14958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:52.973478', 'step': 14958, 'epoch': 3} {'type': 'loss', 'content': 0.16871418058872223, 'timestamp': '2025-09-30 22:35:52.978209', 'step': 14959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.039821', 'step': 14959, 'epoch': 3} {'type': 'loss', 'content': 0.08813873678445816, 'timestamp': '2025-09-30 22:35:53.045920', 'step': 14960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:53.110600', 'step': 14960, 'epoch': 3} {'type': 'loss', 'content': 0.0633549913764, 'timestamp': '2025-09-30 22:35:53.114257', 'step': 14961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.173150', 'step': 14961, 'epoch': 3} {'type': 'loss', 'content': 0.11539271473884583, 'timestamp': '2025-09-30 22:35:53.183614', 'step': 14962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.251305', 'step': 14962, 'epoch': 3} {'type': 'loss', 'content': 0.12463999539613724, 'timestamp': '2025-09-30 22:35:53.255400', 'step': 14963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.318364', 'step': 14963, 'epoch': 3} {'type': 'loss', 'content': 0.12126551568508148, 'timestamp': '2025-09-30 22:35:53.326672', 'step': 14964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.386828', 'step': 14964, 'epoch': 3} {'type': 'loss', 'content': 0.16279254853725433, 'timestamp': '2025-09-30 22:35:53.392054', 'step': 14965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:53.449295', 'step': 14965, 'epoch': 3} {'type': 'loss', 'content': 0.07609070092439651, 'timestamp': '2025-09-30 22:35:53.452575', 'step': 14966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:53.512137', 'step': 14966, 'epoch': 3} {'type': 'loss', 'content': 0.11496610194444656, 'timestamp': '2025-09-30 22:35:53.515045', 'step': 14967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.585563', 'step': 14967, 'epoch': 3} {'type': 'loss', 'content': 0.17023997008800507, 'timestamp': '2025-09-30 22:35:53.592328', 'step': 14968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:53.648954', 'step': 14968, 'epoch': 3} {'type': 'loss', 'content': 0.1280641108751297, 'timestamp': '2025-09-30 22:35:53.658795', 'step': 14969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.726455', 'step': 14969, 'epoch': 3} {'type': 'loss', 'content': 0.060616474598646164, 'timestamp': '2025-09-30 22:35:53.729646', 'step': 14970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:53.789154', 'step': 14970, 'epoch': 3} {'type': 'loss', 'content': 0.1122741550207138, 'timestamp': '2025-09-30 22:35:53.793234', 'step': 14971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.862279', 'step': 14971, 'epoch': 3} {'type': 'loss', 'content': 0.17637689411640167, 'timestamp': '2025-09-30 22:35:53.869349', 'step': 14972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:53.930808', 'step': 14972, 'epoch': 3} {'type': 'loss', 'content': 0.08804041147232056, 'timestamp': '2025-09-30 22:35:53.948629', 'step': 14973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:54.010816', 'step': 14973, 'epoch': 3} {'type': 'loss', 'content': 0.07891864329576492, 'timestamp': '2025-09-30 22:35:54.014283', 'step': 14974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:54.079184', 'step': 14974, 'epoch': 3} {'type': 'loss', 'content': 0.12272880971431732, 'timestamp': '2025-09-30 22:35:54.082523', 'step': 14975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:54.139659', 'step': 14975, 'epoch': 3} {'type': 'loss', 'content': 0.08337600529193878, 'timestamp': '2025-09-30 22:35:54.149785', 'step': 14976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:54.208542', 'step': 14976, 'epoch': 3} {'type': 'loss', 'content': 0.09229771047830582, 'timestamp': '2025-09-30 22:35:54.211903', 'step': 14977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:54.271094', 'step': 14977, 'epoch': 3} {'type': 'loss', 'content': 0.11422613263130188, 'timestamp': '2025-09-30 22:35:54.277858', 'step': 14978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:54.336865', 'step': 14978, 'epoch': 3} {'type': 'loss', 'content': 0.1176045686006546, 'timestamp': '2025-09-30 22:35:54.340423', 'step': 14979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:54.401227', 'step': 14979, 'epoch': 3} {'type': 'loss', 'content': 0.125566303730011, 'timestamp': '2025-09-30 22:35:54.407322', 'step': 14980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:54.463375', 'step': 14980, 'epoch': 3} {'type': 'loss', 'content': 0.17289170622825623, 'timestamp': '2025-09-30 22:35:54.466387', 'step': 14981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:54.542803', 'step': 14981, 'epoch': 3} {'type': 'loss', 'content': 0.0980992317199707, 'timestamp': '2025-09-30 22:35:54.545880', 'step': 14982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:54.607274', 'step': 14982, 'epoch': 3} {'type': 'loss', 'content': 0.2191745489835739, 'timestamp': '2025-09-30 22:35:54.611097', 'step': 14983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:54.671317', 'step': 14983, 'epoch': 3} {'type': 'loss', 'content': 0.09414996951818466, 'timestamp': '2025-09-30 22:35:54.678210', 'step': 14984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:54.734767', 'step': 14984, 'epoch': 3} {'type': 'loss', 'content': 0.046614620834589005, 'timestamp': '2025-09-30 22:35:54.737483', 'step': 14985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:54.794259', 'step': 14985, 'epoch': 3} {'type': 'loss', 'content': 0.1501125544309616, 'timestamp': '2025-09-30 22:35:54.797044', 'step': 14986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:54.856966', 'step': 14986, 'epoch': 3} {'type': 'loss', 'content': 0.05850644037127495, 'timestamp': '2025-09-30 22:35:54.859636', 'step': 14987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:54.916810', 'step': 14987, 'epoch': 3} {'type': 'loss', 'content': 0.12472482025623322, 'timestamp': '2025-09-30 22:35:54.923463', 'step': 14988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:54.979284', 'step': 14988, 'epoch': 3} {'type': 'loss', 'content': 0.1513342410326004, 'timestamp': '2025-09-30 22:35:54.981617', 'step': 14989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:55.038639', 'step': 14989, 'epoch': 3} {'type': 'loss', 'content': 0.1544291377067566, 'timestamp': '2025-09-30 22:35:55.041509', 'step': 14990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:55.103758', 'step': 14990, 'epoch': 3} {'type': 'loss', 'content': 0.09672436863183975, 'timestamp': '2025-09-30 22:35:55.106963', 'step': 14991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:55.164277', 'step': 14991, 'epoch': 3} {'type': 'loss', 'content': 0.07941713184118271, 'timestamp': '2025-09-30 22:35:55.178193', 'step': 14992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:55.235855', 'step': 14992, 'epoch': 3} {'type': 'loss', 'content': 0.15504871308803558, 'timestamp': '2025-09-30 22:35:55.239837', 'step': 14993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:55.297376', 'step': 14993, 'epoch': 3} {'type': 'loss', 'content': 0.05293785035610199, 'timestamp': '2025-09-30 22:35:55.300176', 'step': 14994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:55.360563', 'step': 14994, 'epoch': 3} {'type': 'loss', 'content': 0.1431187242269516, 'timestamp': '2025-09-30 22:35:55.363886', 'step': 14995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:55.421599', 'step': 14995, 'epoch': 3} {'type': 'loss', 'content': 0.12244158238172531, 'timestamp': '2025-09-30 22:35:55.427822', 'step': 14996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:55.484123', 'step': 14996, 'epoch': 3} {'type': 'loss', 'content': 0.0696648433804512, 'timestamp': '2025-09-30 22:35:55.486745', 'step': 14997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:55.551828', 'step': 14997, 'epoch': 3} {'type': 'loss', 'content': 0.07380343228578568, 'timestamp': '2025-09-30 22:35:55.554388', 'step': 14998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:55.612094', 'step': 14998, 'epoch': 3} {'type': 'loss', 'content': 0.098829485476017, 'timestamp': '2025-09-30 22:35:55.618654', 'step': 14999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:55.676585', 'step': 14999, 'epoch': 3} {'type': 'loss', 'content': 0.1117246150970459, 'timestamp': '2025-09-30 22:35:55.683565', 'step': 15000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15000', 'timestamp': '2025-09-30 22:35:56.125839', 'step': 15000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:56.185960', 'step': 15000, 'epoch': 3} {'type': 'loss', 'content': 0.13423879444599152, 'timestamp': '2025-09-30 22:35:56.194729', 'step': 15001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:56.261502', 'step': 15001, 'epoch': 3} {'type': 'loss', 'content': 0.11353226006031036, 'timestamp': '2025-09-30 22:35:56.264296', 'step': 15002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:56.322195', 'step': 15002, 'epoch': 3} {'type': 'loss', 'content': 0.09294672310352325, 'timestamp': '2025-09-30 22:35:56.326333', 'step': 15003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:56.384251', 'step': 15003, 'epoch': 3} {'type': 'loss', 'content': 0.08506571501493454, 'timestamp': '2025-09-30 22:35:56.391997', 'step': 15004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:56.450760', 'step': 15004, 'epoch': 3} {'type': 'loss', 'content': 0.06146180257201195, 'timestamp': '2025-09-30 22:35:56.462025', 'step': 15005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:56.528136', 'step': 15005, 'epoch': 3} {'type': 'loss', 'content': 0.08080118149518967, 'timestamp': '2025-09-30 22:35:56.539806', 'step': 15006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:56.607377', 'step': 15006, 'epoch': 3} {'type': 'loss', 'content': 0.1350078284740448, 'timestamp': '2025-09-30 22:35:56.620260', 'step': 15007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:56.679853', 'step': 15007, 'epoch': 3} {'type': 'loss', 'content': 0.12405498325824738, 'timestamp': '2025-09-30 22:35:56.686807', 'step': 15008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:56.743538', 'step': 15008, 'epoch': 3} {'type': 'loss', 'content': 0.0748002678155899, 'timestamp': '2025-09-30 22:35:56.748762', 'step': 15009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:56.807855', 'step': 15009, 'epoch': 3} {'type': 'loss', 'content': 0.18431329727172852, 'timestamp': '2025-09-30 22:35:56.810767', 'step': 15010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:56.868207', 'step': 15010, 'epoch': 3} {'type': 'loss', 'content': 0.08018466830253601, 'timestamp': '2025-09-30 22:35:56.871391', 'step': 15011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:56.928387', 'step': 15011, 'epoch': 3} {'type': 'loss', 'content': 0.11176923662424088, 'timestamp': '2025-09-30 22:35:56.936067', 'step': 15012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:56.998442', 'step': 15012, 'epoch': 3} {'type': 'loss', 'content': 0.11036065220832825, 'timestamp': '2025-09-30 22:35:57.001910', 'step': 15013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:57.060546', 'step': 15013, 'epoch': 3} {'type': 'loss', 'content': 0.08538907021284103, 'timestamp': '2025-09-30 22:35:57.070220', 'step': 15014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:57.131950', 'step': 15014, 'epoch': 3} {'type': 'loss', 'content': 0.057862475514411926, 'timestamp': '2025-09-30 22:35:57.135669', 'step': 15015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:57.191847', 'step': 15015, 'epoch': 3} {'type': 'loss', 'content': 0.13191023468971252, 'timestamp': '2025-09-30 22:35:57.198465', 'step': 15016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:57.255909', 'step': 15016, 'epoch': 3} {'type': 'loss', 'content': 0.10803274810314178, 'timestamp': '2025-09-30 22:35:57.259612', 'step': 15017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:57.316791', 'step': 15017, 'epoch': 3} {'type': 'loss', 'content': 0.11594859510660172, 'timestamp': '2025-09-30 22:35:57.320569', 'step': 15018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:57.379522', 'step': 15018, 'epoch': 3} {'type': 'loss', 'content': 0.08834195137023926, 'timestamp': '2025-09-30 22:35:57.382988', 'step': 15019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:57.449644', 'step': 15019, 'epoch': 3} {'type': 'loss', 'content': 0.1207539513707161, 'timestamp': '2025-09-30 22:35:57.455978', 'step': 15020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:57.513194', 'step': 15020, 'epoch': 3} {'type': 'loss', 'content': 0.1623305082321167, 'timestamp': '2025-09-30 22:35:57.539143', 'step': 15021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:57.599110', 'step': 15021, 'epoch': 3} {'type': 'loss', 'content': 0.06059355288743973, 'timestamp': '2025-09-30 22:35:57.603325', 'step': 15022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:57.662435', 'step': 15022, 'epoch': 3} {'type': 'loss', 'content': 0.13369128108024597, 'timestamp': '2025-09-30 22:35:57.666751', 'step': 15023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:57.725444', 'step': 15023, 'epoch': 3} {'type': 'loss', 'content': 0.15451037883758545, 'timestamp': '2025-09-30 22:35:57.732458', 'step': 15024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:57.800767', 'step': 15024, 'epoch': 3} {'type': 'loss', 'content': 0.11532555520534515, 'timestamp': '2025-09-30 22:35:57.803798', 'step': 15025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:57.862558', 'step': 15025, 'epoch': 3} {'type': 'loss', 'content': 0.09075374901294708, 'timestamp': '2025-09-30 22:35:57.865469', 'step': 15026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:57.923455', 'step': 15026, 'epoch': 3} {'type': 'loss', 'content': 0.11515481024980545, 'timestamp': '2025-09-30 22:35:57.926187', 'step': 15027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:57.984164', 'step': 15027, 'epoch': 3} {'type': 'loss', 'content': 0.15392890572547913, 'timestamp': '2025-09-30 22:35:57.997992', 'step': 15028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:58.069845', 'step': 15028, 'epoch': 3} {'type': 'loss', 'content': 0.0809750035405159, 'timestamp': '2025-09-30 22:35:58.074296', 'step': 15029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:58.131425', 'step': 15029, 'epoch': 3} {'type': 'loss', 'content': 0.17934468388557434, 'timestamp': '2025-09-30 22:35:58.134333', 'step': 15030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:58.194681', 'step': 15030, 'epoch': 3} {'type': 'loss', 'content': 0.08249516785144806, 'timestamp': '2025-09-30 22:35:58.198449', 'step': 15031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:58.259852', 'step': 15031, 'epoch': 3} {'type': 'loss', 'content': 0.0964125245809555, 'timestamp': '2025-09-30 22:35:58.271376', 'step': 15032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:58.331953', 'step': 15032, 'epoch': 3} {'type': 'loss', 'content': 0.11628933995962143, 'timestamp': '2025-09-30 22:35:58.337967', 'step': 15033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:58.395590', 'step': 15033, 'epoch': 3} {'type': 'loss', 'content': 0.21051055192947388, 'timestamp': '2025-09-30 22:35:58.398233', 'step': 15034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:58.462524', 'step': 15034, 'epoch': 3} {'type': 'loss', 'content': 0.046974122524261475, 'timestamp': '2025-09-30 22:35:58.477099', 'step': 15035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:58.537506', 'step': 15035, 'epoch': 3} {'type': 'loss', 'content': 0.1840680092573166, 'timestamp': '2025-09-30 22:35:58.544679', 'step': 15036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:58.608810', 'step': 15036, 'epoch': 3} {'type': 'loss', 'content': 0.12459829449653625, 'timestamp': '2025-09-30 22:35:58.612436', 'step': 15037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:58.668238', 'step': 15037, 'epoch': 3} {'type': 'loss', 'content': 0.13935792446136475, 'timestamp': '2025-09-30 22:35:58.675790', 'step': 15038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:35:58.734428', 'step': 15038, 'epoch': 3} {'type': 'loss', 'content': 0.06187278777360916, 'timestamp': '2025-09-30 22:35:58.741754', 'step': 15039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:58.808275', 'step': 15039, 'epoch': 3} {'type': 'loss', 'content': 0.20308028161525726, 'timestamp': '2025-09-30 22:35:58.820500', 'step': 15040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:58.883769', 'step': 15040, 'epoch': 3} {'type': 'loss', 'content': 0.16227051615715027, 'timestamp': '2025-09-30 22:35:58.893343', 'step': 15041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:35:58.955127', 'step': 15041, 'epoch': 3} {'type': 'loss', 'content': 0.105650395154953, 'timestamp': '2025-09-30 22:35:58.965402', 'step': 15042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:35:59.027719', 'step': 15042, 'epoch': 3} {'type': 'loss', 'content': 0.19835679233074188, 'timestamp': '2025-09-30 22:35:59.039469', 'step': 15043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:59.098302', 'step': 15043, 'epoch': 3} {'type': 'loss', 'content': 0.11711559444665909, 'timestamp': '2025-09-30 22:35:59.105557', 'step': 15044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:59.164184', 'step': 15044, 'epoch': 3} {'type': 'loss', 'content': 0.03841501474380493, 'timestamp': '2025-09-30 22:35:59.166903', 'step': 15045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:59.228770', 'step': 15045, 'epoch': 3} {'type': 'loss', 'content': 0.06381296366453171, 'timestamp': '2025-09-30 22:35:59.233019', 'step': 15046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:59.293521', 'step': 15046, 'epoch': 3} {'type': 'loss', 'content': 0.14416100084781647, 'timestamp': '2025-09-30 22:35:59.296327', 'step': 15047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:59.360504', 'step': 15047, 'epoch': 3} {'type': 'loss', 'content': 0.22481732070446014, 'timestamp': '2025-09-30 22:35:59.371476', 'step': 15048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:59.434873', 'step': 15048, 'epoch': 3} {'type': 'loss', 'content': 0.15661680698394775, 'timestamp': '2025-09-30 22:35:59.438569', 'step': 15049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:59.496197', 'step': 15049, 'epoch': 3} {'type': 'loss', 'content': 0.11766748875379562, 'timestamp': '2025-09-30 22:35:59.499823', 'step': 15050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:59.561971', 'step': 15050, 'epoch': 3} {'type': 'loss', 'content': 0.08516712486743927, 'timestamp': '2025-09-30 22:35:59.566633', 'step': 15051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:59.623665', 'step': 15051, 'epoch': 3} {'type': 'loss', 'content': 0.10923879593610764, 'timestamp': '2025-09-30 22:35:59.631381', 'step': 15052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:59.687868', 'step': 15052, 'epoch': 3} {'type': 'loss', 'content': 0.1682724505662918, 'timestamp': '2025-09-30 22:35:59.692219', 'step': 15053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:59.751465', 'step': 15053, 'epoch': 3} {'type': 'loss', 'content': 0.1634272038936615, 'timestamp': '2025-09-30 22:35:59.754278', 'step': 15054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:35:59.812209', 'step': 15054, 'epoch': 3} {'type': 'loss', 'content': 0.07859840244054794, 'timestamp': '2025-09-30 22:35:59.816751', 'step': 15055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:35:59.876322', 'step': 15055, 'epoch': 3} {'type': 'loss', 'content': 0.09810415655374527, 'timestamp': '2025-09-30 22:35:59.882210', 'step': 15056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:35:59.940748', 'step': 15056, 'epoch': 3} {'type': 'loss', 'content': 0.07773363590240479, 'timestamp': '2025-09-30 22:35:59.943109', 'step': 15057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:00.000130', 'step': 15057, 'epoch': 3} {'type': 'loss', 'content': 0.1208445280790329, 'timestamp': '2025-09-30 22:36:00.003528', 'step': 15058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:00.060848', 'step': 15058, 'epoch': 3} {'type': 'loss', 'content': 0.12597660720348358, 'timestamp': '2025-09-30 22:36:00.064317', 'step': 15059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:00.122442', 'step': 15059, 'epoch': 3} {'type': 'loss', 'content': 0.13549330830574036, 'timestamp': '2025-09-30 22:36:00.132150', 'step': 15060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:00.196039', 'step': 15060, 'epoch': 3} {'type': 'loss', 'content': 0.1643536388874054, 'timestamp': '2025-09-30 22:36:00.200878', 'step': 15061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:00.264946', 'step': 15061, 'epoch': 3} {'type': 'loss', 'content': 0.12981177866458893, 'timestamp': '2025-09-30 22:36:00.269254', 'step': 15062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:00.327596', 'step': 15062, 'epoch': 3} {'type': 'loss', 'content': 0.16158179938793182, 'timestamp': '2025-09-30 22:36:00.331498', 'step': 15063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:00.390885', 'step': 15063, 'epoch': 3} {'type': 'loss', 'content': 0.12792560458183289, 'timestamp': '2025-09-30 22:36:00.397234', 'step': 15064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:00.454755', 'step': 15064, 'epoch': 3} {'type': 'loss', 'content': 0.11779947578907013, 'timestamp': '2025-09-30 22:36:00.459676', 'step': 15065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:00.518850', 'step': 15065, 'epoch': 3} {'type': 'loss', 'content': 0.17501619458198547, 'timestamp': '2025-09-30 22:36:00.527455', 'step': 15066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:00.588463', 'step': 15066, 'epoch': 3} {'type': 'loss', 'content': 0.12767302989959717, 'timestamp': '2025-09-30 22:36:00.598336', 'step': 15067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:00.657103', 'step': 15067, 'epoch': 3} {'type': 'loss', 'content': 0.1515464037656784, 'timestamp': '2025-09-30 22:36:00.663239', 'step': 15068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:00.721813', 'step': 15068, 'epoch': 3} {'type': 'loss', 'content': 0.07391025871038437, 'timestamp': '2025-09-30 22:36:00.724662', 'step': 15069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:00.785778', 'step': 15069, 'epoch': 3} {'type': 'loss', 'content': 0.09337623417377472, 'timestamp': '2025-09-30 22:36:00.788460', 'step': 15070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:00.844986', 'step': 15070, 'epoch': 3} {'type': 'loss', 'content': 0.09537819027900696, 'timestamp': '2025-09-30 22:36:00.847334', 'step': 15071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:00.905228', 'step': 15071, 'epoch': 3} {'type': 'loss', 'content': 0.07859213650226593, 'timestamp': '2025-09-30 22:36:00.911403', 'step': 15072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:00.969279', 'step': 15072, 'epoch': 3} {'type': 'loss', 'content': 0.08236543834209442, 'timestamp': '2025-09-30 22:36:00.977091', 'step': 15073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:01.037234', 'step': 15073, 'epoch': 3} {'type': 'loss', 'content': 0.19038930535316467, 'timestamp': '2025-09-30 22:36:01.039739', 'step': 15074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:36:01.096479', 'step': 15074, 'epoch': 3} {'type': 'loss', 'content': 0.2467915266752243, 'timestamp': '2025-09-30 22:36:01.099328', 'step': 15075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:01.161234', 'step': 15075, 'epoch': 3} {'type': 'loss', 'content': 0.13360993564128876, 'timestamp': '2025-09-30 22:36:01.168279', 'step': 15076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:01.224109', 'step': 15076, 'epoch': 3} {'type': 'loss', 'content': 0.05857502669095993, 'timestamp': '2025-09-30 22:36:01.232736', 'step': 15077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:01.289397', 'step': 15077, 'epoch': 3} {'type': 'loss', 'content': 0.15345847606658936, 'timestamp': '2025-09-30 22:36:01.296338', 'step': 15078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:01.356071', 'step': 15078, 'epoch': 3} {'type': 'loss', 'content': 0.09661206603050232, 'timestamp': '2025-09-30 22:36:01.358820', 'step': 15079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:01.420079', 'step': 15079, 'epoch': 3} {'type': 'loss', 'content': 0.09953441470861435, 'timestamp': '2025-09-30 22:36:01.426195', 'step': 15080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:01.482347', 'step': 15080, 'epoch': 3} {'type': 'loss', 'content': 0.1619897484779358, 'timestamp': '2025-09-30 22:36:01.484927', 'step': 15081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:01.546093', 'step': 15081, 'epoch': 3} {'type': 'loss', 'content': 0.15109674632549286, 'timestamp': '2025-09-30 22:36:01.548875', 'step': 15082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:01.606404', 'step': 15082, 'epoch': 3} {'type': 'loss', 'content': 0.08725517988204956, 'timestamp': '2025-09-30 22:36:01.609730', 'step': 15083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:01.667720', 'step': 15083, 'epoch': 3} {'type': 'loss', 'content': 0.0948481634259224, 'timestamp': '2025-09-30 22:36:01.674140', 'step': 15084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:01.744105', 'step': 15084, 'epoch': 3} {'type': 'loss', 'content': 0.13838636875152588, 'timestamp': '2025-09-30 22:36:01.753074', 'step': 15085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:01.810459', 'step': 15085, 'epoch': 3} {'type': 'loss', 'content': 0.09684751182794571, 'timestamp': '2025-09-30 22:36:01.815301', 'step': 15086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:01.876832', 'step': 15086, 'epoch': 3} {'type': 'loss', 'content': 0.07760093361139297, 'timestamp': '2025-09-30 22:36:01.884806', 'step': 15087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:01.945054', 'step': 15087, 'epoch': 3} {'type': 'loss', 'content': 0.09891849011182785, 'timestamp': '2025-09-30 22:36:01.952050', 'step': 15088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:02.008644', 'step': 15088, 'epoch': 3} {'type': 'loss', 'content': 0.06591048091650009, 'timestamp': '2025-09-30 22:36:02.014770', 'step': 15089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:02.083138', 'step': 15089, 'epoch': 3} {'type': 'loss', 'content': 0.0974031314253807, 'timestamp': '2025-09-30 22:36:02.085849', 'step': 15090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:02.144244', 'step': 15090, 'epoch': 3} {'type': 'loss', 'content': 0.07737693190574646, 'timestamp': '2025-09-30 22:36:02.147400', 'step': 15091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:02.205181', 'step': 15091, 'epoch': 3} {'type': 'loss', 'content': 0.17443817853927612, 'timestamp': '2025-09-30 22:36:02.212590', 'step': 15092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:02.270941', 'step': 15092, 'epoch': 3} {'type': 'loss', 'content': 0.20782072842121124, 'timestamp': '2025-09-30 22:36:02.275492', 'step': 15093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:02.334108', 'step': 15093, 'epoch': 3} {'type': 'loss', 'content': 0.12721222639083862, 'timestamp': '2025-09-30 22:36:02.337753', 'step': 15094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:02.395898', 'step': 15094, 'epoch': 3} {'type': 'loss', 'content': 0.15065410733222961, 'timestamp': '2025-09-30 22:36:02.398849', 'step': 15095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:02.456984', 'step': 15095, 'epoch': 3} {'type': 'loss', 'content': 0.1888684183359146, 'timestamp': '2025-09-30 22:36:02.463386', 'step': 15096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:02.520940', 'step': 15096, 'epoch': 3} {'type': 'loss', 'content': 0.13106685876846313, 'timestamp': '2025-09-30 22:36:02.523512', 'step': 15097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:02.580301', 'step': 15097, 'epoch': 3} {'type': 'loss', 'content': 0.043522924184799194, 'timestamp': '2025-09-30 22:36:02.583232', 'step': 15098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:02.641391', 'step': 15098, 'epoch': 3} {'type': 'loss', 'content': 0.18967504799365997, 'timestamp': '2025-09-30 22:36:02.643748', 'step': 15099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:02.703006', 'step': 15099, 'epoch': 3} {'type': 'loss', 'content': 0.08004329353570938, 'timestamp': '2025-09-30 22:36:02.709560', 'step': 15100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:02.768096', 'step': 15100, 'epoch': 3} {'type': 'loss', 'content': 0.15103043615818024, 'timestamp': '2025-09-30 22:36:02.774841', 'step': 15101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:02.836430', 'step': 15101, 'epoch': 3} {'type': 'loss', 'content': 0.058963075280189514, 'timestamp': '2025-09-30 22:36:02.843113', 'step': 15102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:02.901852', 'step': 15102, 'epoch': 3} {'type': 'loss', 'content': 0.15529541671276093, 'timestamp': '2025-09-30 22:36:02.907663', 'step': 15103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:02.974246', 'step': 15103, 'epoch': 3} {'type': 'loss', 'content': 0.08617160469293594, 'timestamp': '2025-09-30 22:36:02.984751', 'step': 15104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:03.043243', 'step': 15104, 'epoch': 3} {'type': 'loss', 'content': 0.11045075207948685, 'timestamp': '2025-09-30 22:36:03.045484', 'step': 15105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:03.103548', 'step': 15105, 'epoch': 3} {'type': 'loss', 'content': 0.05960674211382866, 'timestamp': '2025-09-30 22:36:03.110777', 'step': 15106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:03.169193', 'step': 15106, 'epoch': 3} {'type': 'loss', 'content': 0.10419218987226486, 'timestamp': '2025-09-30 22:36:03.172622', 'step': 15107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:03.231782', 'step': 15107, 'epoch': 3} {'type': 'loss', 'content': 0.12029477953910828, 'timestamp': '2025-09-30 22:36:03.243615', 'step': 15108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:36:03.301073', 'step': 15108, 'epoch': 3} {'type': 'loss', 'content': 0.1300009936094284, 'timestamp': '2025-09-30 22:36:03.303628', 'step': 15109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:03.366281', 'step': 15109, 'epoch': 3} {'type': 'loss', 'content': 0.11217042803764343, 'timestamp': '2025-09-30 22:36:03.373433', 'step': 15110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:03.432059', 'step': 15110, 'epoch': 3} {'type': 'loss', 'content': 0.06040675938129425, 'timestamp': '2025-09-30 22:36:03.435155', 'step': 15111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:03.500882', 'step': 15111, 'epoch': 3} {'type': 'loss', 'content': 0.1274203658103943, 'timestamp': '2025-09-30 22:36:03.506603', 'step': 15112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:03.564889', 'step': 15112, 'epoch': 3} {'type': 'loss', 'content': 0.1197192519903183, 'timestamp': '2025-09-30 22:36:03.567212', 'step': 15113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:03.624699', 'step': 15113, 'epoch': 3} {'type': 'loss', 'content': 0.1414647251367569, 'timestamp': '2025-09-30 22:36:03.628164', 'step': 15114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:03.688275', 'step': 15114, 'epoch': 3} {'type': 'loss', 'content': 0.0981680154800415, 'timestamp': '2025-09-30 22:36:03.692636', 'step': 15115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:03.750117', 'step': 15115, 'epoch': 3} {'type': 'loss', 'content': 0.08683642745018005, 'timestamp': '2025-09-30 22:36:03.757155', 'step': 15116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:03.816211', 'step': 15116, 'epoch': 3} {'type': 'loss', 'content': 0.10938746482133865, 'timestamp': '2025-09-30 22:36:03.819845', 'step': 15117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:03.879308', 'step': 15117, 'epoch': 3} {'type': 'loss', 'content': 0.13434986770153046, 'timestamp': '2025-09-30 22:36:03.889485', 'step': 15118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:03.946884', 'step': 15118, 'epoch': 3} {'type': 'loss', 'content': 0.06528185307979584, 'timestamp': '2025-09-30 22:36:03.950138', 'step': 15119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:04.008185', 'step': 15119, 'epoch': 3} {'type': 'loss', 'content': 0.10561810433864594, 'timestamp': '2025-09-30 22:36:04.024856', 'step': 15120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:04.081050', 'step': 15120, 'epoch': 3} {'type': 'loss', 'content': 0.17729057371616364, 'timestamp': '2025-09-30 22:36:04.084198', 'step': 15121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:04.147654', 'step': 15121, 'epoch': 3} {'type': 'loss', 'content': 0.05667375773191452, 'timestamp': '2025-09-30 22:36:04.151906', 'step': 15122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:04.211472', 'step': 15122, 'epoch': 3} {'type': 'loss', 'content': 0.10251667350530624, 'timestamp': '2025-09-30 22:36:04.214352', 'step': 15123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:04.272883', 'step': 15123, 'epoch': 3} {'type': 'loss', 'content': 0.11450658738613129, 'timestamp': '2025-09-30 22:36:04.280042', 'step': 15124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:04.338730', 'step': 15124, 'epoch': 3} {'type': 'loss', 'content': 0.145753413438797, 'timestamp': '2025-09-30 22:36:04.342661', 'step': 15125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:04.405456', 'step': 15125, 'epoch': 3} {'type': 'loss', 'content': 0.09228453040122986, 'timestamp': '2025-09-30 22:36:04.410911', 'step': 15126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:04.472200', 'step': 15126, 'epoch': 3} {'type': 'loss', 'content': 0.13864031434059143, 'timestamp': '2025-09-30 22:36:04.474421', 'step': 15127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:04.531807', 'step': 15127, 'epoch': 3} {'type': 'loss', 'content': 0.13248704373836517, 'timestamp': '2025-09-30 22:36:04.537676', 'step': 15128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:04.595517', 'step': 15128, 'epoch': 3} {'type': 'loss', 'content': 0.11221291124820709, 'timestamp': '2025-09-30 22:36:04.598035', 'step': 15129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:04.655154', 'step': 15129, 'epoch': 3} {'type': 'loss', 'content': 0.2522677779197693, 'timestamp': '2025-09-30 22:36:04.657643', 'step': 15130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:04.715693', 'step': 15130, 'epoch': 3} {'type': 'loss', 'content': 0.09520721435546875, 'timestamp': '2025-09-30 22:36:04.718257', 'step': 15131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:04.776418', 'step': 15131, 'epoch': 3} {'type': 'loss', 'content': 0.12999816238880157, 'timestamp': '2025-09-30 22:36:04.782639', 'step': 15132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:04.843862', 'step': 15132, 'epoch': 3} {'type': 'loss', 'content': 0.10765254497528076, 'timestamp': '2025-09-30 22:36:04.847542', 'step': 15133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:04.907473', 'step': 15133, 'epoch': 3} {'type': 'loss', 'content': 0.204292893409729, 'timestamp': '2025-09-30 22:36:04.912018', 'step': 15134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:04.969887', 'step': 15134, 'epoch': 3} {'type': 'loss', 'content': 0.09873295575380325, 'timestamp': '2025-09-30 22:36:04.972372', 'step': 15135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:05.029979', 'step': 15135, 'epoch': 3} {'type': 'loss', 'content': 0.15991759300231934, 'timestamp': '2025-09-30 22:36:05.037042', 'step': 15136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:05.093706', 'step': 15136, 'epoch': 3} {'type': 'loss', 'content': 0.0864594504237175, 'timestamp': '2025-09-30 22:36:05.096456', 'step': 15137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:05.154822', 'step': 15137, 'epoch': 3} {'type': 'loss', 'content': 0.11303293704986572, 'timestamp': '2025-09-30 22:36:05.157272', 'step': 15138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:05.216832', 'step': 15138, 'epoch': 3} {'type': 'loss', 'content': 0.16085587441921234, 'timestamp': '2025-09-30 22:36:05.219530', 'step': 15139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:05.277918', 'step': 15139, 'epoch': 3} {'type': 'loss', 'content': 0.11994942277669907, 'timestamp': '2025-09-30 22:36:05.289979', 'step': 15140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:05.347888', 'step': 15140, 'epoch': 3} {'type': 'loss', 'content': 0.15122756361961365, 'timestamp': '2025-09-30 22:36:05.351493', 'step': 15141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:05.413986', 'step': 15141, 'epoch': 3} {'type': 'loss', 'content': 0.07363196462392807, 'timestamp': '2025-09-30 22:36:05.417169', 'step': 15142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:05.475199', 'step': 15142, 'epoch': 3} {'type': 'loss', 'content': 0.15279774367809296, 'timestamp': '2025-09-30 22:36:05.480707', 'step': 15143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:05.537123', 'step': 15143, 'epoch': 3} {'type': 'loss', 'content': 0.05586498975753784, 'timestamp': '2025-09-30 22:36:05.543512', 'step': 15144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:05.600353', 'step': 15144, 'epoch': 3} {'type': 'loss', 'content': 0.07896246016025543, 'timestamp': '2025-09-30 22:36:05.603139', 'step': 15145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:05.663771', 'step': 15145, 'epoch': 3} {'type': 'loss', 'content': 0.1217295378446579, 'timestamp': '2025-09-30 22:36:05.666512', 'step': 15146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:05.732318', 'step': 15146, 'epoch': 3} {'type': 'loss', 'content': 0.16527749598026276, 'timestamp': '2025-09-30 22:36:05.735088', 'step': 15147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:05.796886', 'step': 15147, 'epoch': 3} {'type': 'loss', 'content': 0.12141808867454529, 'timestamp': '2025-09-30 22:36:05.810587', 'step': 15148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:05.869623', 'step': 15148, 'epoch': 3} {'type': 'loss', 'content': 0.16797256469726562, 'timestamp': '2025-09-30 22:36:05.871806', 'step': 15149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:05.934532', 'step': 15149, 'epoch': 3} {'type': 'loss', 'content': 0.0509670153260231, 'timestamp': '2025-09-30 22:36:05.939479', 'step': 15150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:06.004833', 'step': 15150, 'epoch': 3} {'type': 'loss', 'content': 0.0738542303442955, 'timestamp': '2025-09-30 22:36:06.014878', 'step': 15151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:06.085946', 'step': 15151, 'epoch': 3} {'type': 'loss', 'content': 0.13720951974391937, 'timestamp': '2025-09-30 22:36:06.095938', 'step': 15152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:06.163499', 'step': 15152, 'epoch': 3} {'type': 'loss', 'content': 0.10992635786533356, 'timestamp': '2025-09-30 22:36:06.166433', 'step': 15153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:06.240687', 'step': 15153, 'epoch': 3} {'type': 'loss', 'content': 0.09263954311609268, 'timestamp': '2025-09-30 22:36:06.246457', 'step': 15154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:06.305589', 'step': 15154, 'epoch': 3} {'type': 'loss', 'content': 0.13813789188861847, 'timestamp': '2025-09-30 22:36:06.308181', 'step': 15155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:06.366773', 'step': 15155, 'epoch': 3} {'type': 'loss', 'content': 0.05198434740304947, 'timestamp': '2025-09-30 22:36:06.380366', 'step': 15156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:06.438911', 'step': 15156, 'epoch': 3} {'type': 'loss', 'content': 0.1443943977355957, 'timestamp': '2025-09-30 22:36:06.442410', 'step': 15157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:06.503910', 'step': 15157, 'epoch': 3} {'type': 'loss', 'content': 0.1483355313539505, 'timestamp': '2025-09-30 22:36:06.507351', 'step': 15158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:06.568630', 'step': 15158, 'epoch': 3} {'type': 'loss', 'content': 0.1493116170167923, 'timestamp': '2025-09-30 22:36:06.580322', 'step': 15159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:06.653280', 'step': 15159, 'epoch': 3} {'type': 'loss', 'content': 0.035584431141614914, 'timestamp': '2025-09-30 22:36:06.662147', 'step': 15160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:06.722460', 'step': 15160, 'epoch': 3} {'type': 'loss', 'content': 0.1382455825805664, 'timestamp': '2025-09-30 22:36:06.726366', 'step': 15161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:06.785980', 'step': 15161, 'epoch': 3} {'type': 'loss', 'content': 0.038015492260456085, 'timestamp': '2025-09-30 22:36:06.788927', 'step': 15162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:06.857216', 'step': 15162, 'epoch': 3} {'type': 'loss', 'content': 0.23946218192577362, 'timestamp': '2025-09-30 22:36:06.860169', 'step': 15163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:06.922589', 'step': 15163, 'epoch': 3} {'type': 'loss', 'content': 0.12778310477733612, 'timestamp': '2025-09-30 22:36:06.940130', 'step': 15164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:07.007020', 'step': 15164, 'epoch': 3} {'type': 'loss', 'content': 0.17618580162525177, 'timestamp': '2025-09-30 22:36:07.015317', 'step': 15165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:07.079992', 'step': 15165, 'epoch': 3} {'type': 'loss', 'content': 0.15949223935604095, 'timestamp': '2025-09-30 22:36:07.088218', 'step': 15166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:07.147979', 'step': 15166, 'epoch': 3} {'type': 'loss', 'content': 0.04527444764971733, 'timestamp': '2025-09-30 22:36:07.151178', 'step': 15167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:07.209756', 'step': 15167, 'epoch': 3} {'type': 'loss', 'content': 0.1066669151186943, 'timestamp': '2025-09-30 22:36:07.217478', 'step': 15168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:07.275922', 'step': 15168, 'epoch': 3} {'type': 'loss', 'content': 0.14385823905467987, 'timestamp': '2025-09-30 22:36:07.279064', 'step': 15169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:07.341447', 'step': 15169, 'epoch': 3} {'type': 'loss', 'content': 0.10800199210643768, 'timestamp': '2025-09-30 22:36:07.349179', 'step': 15170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:07.414126', 'step': 15170, 'epoch': 3} {'type': 'loss', 'content': 0.07305809110403061, 'timestamp': '2025-09-30 22:36:07.421918', 'step': 15171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:07.486294', 'step': 15171, 'epoch': 3} {'type': 'loss', 'content': 0.0888916477560997, 'timestamp': '2025-09-30 22:36:07.496082', 'step': 15172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:07.555116', 'step': 15172, 'epoch': 3} {'type': 'loss', 'content': 0.1769610047340393, 'timestamp': '2025-09-30 22:36:07.558130', 'step': 15173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:07.615569', 'step': 15173, 'epoch': 3} {'type': 'loss', 'content': 0.06476469337940216, 'timestamp': '2025-09-30 22:36:07.624923', 'step': 15174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:07.690379', 'step': 15174, 'epoch': 3} {'type': 'loss', 'content': 0.08681140840053558, 'timestamp': '2025-09-30 22:36:07.694564', 'step': 15175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:07.752612', 'step': 15175, 'epoch': 3} {'type': 'loss', 'content': 0.043568238615989685, 'timestamp': '2025-09-30 22:36:07.769842', 'step': 15176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:07.827148', 'step': 15176, 'epoch': 3} {'type': 'loss', 'content': 0.13107044994831085, 'timestamp': '2025-09-30 22:36:07.830742', 'step': 15177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:07.903361', 'step': 15177, 'epoch': 3} {'type': 'loss', 'content': 0.09835633635520935, 'timestamp': '2025-09-30 22:36:07.907642', 'step': 15178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:07.973053', 'step': 15178, 'epoch': 3} {'type': 'loss', 'content': 0.09517074376344681, 'timestamp': '2025-09-30 22:36:07.978698', 'step': 15179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:08.046604', 'step': 15179, 'epoch': 3} {'type': 'loss', 'content': 0.16964787244796753, 'timestamp': '2025-09-30 22:36:08.055013', 'step': 15180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:08.114104', 'step': 15180, 'epoch': 3} {'type': 'loss', 'content': 0.14068326354026794, 'timestamp': '2025-09-30 22:36:08.117022', 'step': 15181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:08.183055', 'step': 15181, 'epoch': 3} {'type': 'loss', 'content': 0.06592366844415665, 'timestamp': '2025-09-30 22:36:08.186453', 'step': 15182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:08.245162', 'step': 15182, 'epoch': 3} {'type': 'loss', 'content': 0.08655968308448792, 'timestamp': '2025-09-30 22:36:08.260624', 'step': 15183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:36:08.320167', 'step': 15183, 'epoch': 3} {'type': 'loss', 'content': 0.07760227471590042, 'timestamp': '2025-09-30 22:36:08.326746', 'step': 15184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:08.395427', 'step': 15184, 'epoch': 3} {'type': 'loss', 'content': 0.1134176105260849, 'timestamp': '2025-09-30 22:36:08.399225', 'step': 15185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:08.459262', 'step': 15185, 'epoch': 3} {'type': 'loss', 'content': 0.0796208456158638, 'timestamp': '2025-09-30 22:36:08.463029', 'step': 15186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:08.527082', 'step': 15186, 'epoch': 3} {'type': 'loss', 'content': 0.0821833685040474, 'timestamp': '2025-09-30 22:36:08.532010', 'step': 15187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:08.600519', 'step': 15187, 'epoch': 3} {'type': 'loss', 'content': 0.13246625661849976, 'timestamp': '2025-09-30 22:36:08.615200', 'step': 15188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:08.674669', 'step': 15188, 'epoch': 3} {'type': 'loss', 'content': 0.08418643474578857, 'timestamp': '2025-09-30 22:36:08.677661', 'step': 15189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:08.779059', 'step': 15189, 'epoch': 3} {'type': 'loss', 'content': 0.05833560973405838, 'timestamp': '2025-09-30 22:36:08.790753', 'step': 15190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:08.883075', 'step': 15190, 'epoch': 3} {'type': 'loss', 'content': 0.16385413706302643, 'timestamp': '2025-09-30 22:36:08.886172', 'step': 15191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:08.979704', 'step': 15191, 'epoch': 3} {'type': 'loss', 'content': 0.13404810428619385, 'timestamp': '2025-09-30 22:36:08.990908', 'step': 15192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:09.083639', 'step': 15192, 'epoch': 3} {'type': 'loss', 'content': 0.044837385416030884, 'timestamp': '2025-09-30 22:36:09.094346', 'step': 15193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:09.160520', 'step': 15193, 'epoch': 3} {'type': 'loss', 'content': 0.061933912336826324, 'timestamp': '2025-09-30 22:36:09.164842', 'step': 15194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:09.228207', 'step': 15194, 'epoch': 3} {'type': 'loss', 'content': 0.08801531046628952, 'timestamp': '2025-09-30 22:36:09.237783', 'step': 15195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:09.342387', 'step': 15195, 'epoch': 3} {'type': 'loss', 'content': 0.07305354624986649, 'timestamp': '2025-09-30 22:36:09.356933', 'step': 15196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:09.432608', 'step': 15196, 'epoch': 3} {'type': 'loss', 'content': 0.11512972414493561, 'timestamp': '2025-09-30 22:36:09.442366', 'step': 15197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:09.536915', 'step': 15197, 'epoch': 3} {'type': 'loss', 'content': 0.11727196723222733, 'timestamp': '2025-09-30 22:36:09.546357', 'step': 15198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:09.611314', 'step': 15198, 'epoch': 3} {'type': 'loss', 'content': 0.11138301342725754, 'timestamp': '2025-09-30 22:36:09.614170', 'step': 15199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:09.683441', 'step': 15199, 'epoch': 3} {'type': 'loss', 'content': 0.13522881269454956, 'timestamp': '2025-09-30 22:36:09.689790', 'step': 15200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:09.749662', 'step': 15200, 'epoch': 3} {'type': 'loss', 'content': 0.029011838138103485, 'timestamp': '2025-09-30 22:36:09.753299', 'step': 15201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:09.823937', 'step': 15201, 'epoch': 3} {'type': 'loss', 'content': 0.078245609998703, 'timestamp': '2025-09-30 22:36:09.827023', 'step': 15202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:09.890900', 'step': 15202, 'epoch': 3} {'type': 'loss', 'content': 0.06913471966981888, 'timestamp': '2025-09-30 22:36:09.893662', 'step': 15203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:09.968756', 'step': 15203, 'epoch': 3} {'type': 'loss', 'content': 0.05431096628308296, 'timestamp': '2025-09-30 22:36:09.977918', 'step': 15204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:10.051891', 'step': 15204, 'epoch': 3} {'type': 'loss', 'content': 0.1208585873246193, 'timestamp': '2025-09-30 22:36:10.056832', 'step': 15205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:10.116190', 'step': 15205, 'epoch': 3} {'type': 'loss', 'content': 0.21119429171085358, 'timestamp': '2025-09-30 22:36:10.118999', 'step': 15206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:10.188788', 'step': 15206, 'epoch': 3} {'type': 'loss', 'content': 0.2201073169708252, 'timestamp': '2025-09-30 22:36:10.192010', 'step': 15207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:10.249497', 'step': 15207, 'epoch': 3} {'type': 'loss', 'content': 0.21733419597148895, 'timestamp': '2025-09-30 22:36:10.256163', 'step': 15208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:10.314711', 'step': 15208, 'epoch': 3} {'type': 'loss', 'content': 0.17792551219463348, 'timestamp': '2025-09-30 22:36:10.317091', 'step': 15209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:10.375334', 'step': 15209, 'epoch': 3} {'type': 'loss', 'content': 0.09063124656677246, 'timestamp': '2025-09-30 22:36:10.386112', 'step': 15210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:10.442842', 'step': 15210, 'epoch': 3} {'type': 'loss', 'content': 0.05522489547729492, 'timestamp': '2025-09-30 22:36:10.452944', 'step': 15211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:10.510461', 'step': 15211, 'epoch': 3} {'type': 'loss', 'content': 0.18656432628631592, 'timestamp': '2025-09-30 22:36:10.520485', 'step': 15212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:10.579816', 'step': 15212, 'epoch': 3} {'type': 'loss', 'content': 0.21958820521831512, 'timestamp': '2025-09-30 22:36:10.582391', 'step': 15213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:10.640283', 'step': 15213, 'epoch': 3} {'type': 'loss', 'content': 0.15905603766441345, 'timestamp': '2025-09-30 22:36:10.643543', 'step': 15214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:10.714421', 'step': 15214, 'epoch': 3} {'type': 'loss', 'content': 0.12893296778202057, 'timestamp': '2025-09-30 22:36:10.718028', 'step': 15215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:10.786746', 'step': 15215, 'epoch': 3} {'type': 'loss', 'content': 0.0603458546102047, 'timestamp': '2025-09-30 22:36:10.794694', 'step': 15216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:10.852629', 'step': 15216, 'epoch': 3} {'type': 'loss', 'content': 0.11344610899686813, 'timestamp': '2025-09-30 22:36:10.856289', 'step': 15217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:10.915542', 'step': 15217, 'epoch': 3} {'type': 'loss', 'content': 0.10013919323682785, 'timestamp': '2025-09-30 22:36:10.920186', 'step': 15218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:10.979377', 'step': 15218, 'epoch': 3} {'type': 'loss', 'content': 0.09389234334230423, 'timestamp': '2025-09-30 22:36:10.991928', 'step': 15219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:11.049998', 'step': 15219, 'epoch': 3} {'type': 'loss', 'content': 0.14114533364772797, 'timestamp': '2025-09-30 22:36:11.056856', 'step': 15220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:11.119351', 'step': 15220, 'epoch': 3} {'type': 'loss', 'content': 0.12641286849975586, 'timestamp': '2025-09-30 22:36:11.122112', 'step': 15221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:11.183961', 'step': 15221, 'epoch': 3} {'type': 'loss', 'content': 0.12248796224594116, 'timestamp': '2025-09-30 22:36:11.189175', 'step': 15222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:11.250775', 'step': 15222, 'epoch': 3} {'type': 'loss', 'content': 0.08941728621721268, 'timestamp': '2025-09-30 22:36:11.254454', 'step': 15223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:11.314590', 'step': 15223, 'epoch': 3} {'type': 'loss', 'content': 0.14653313159942627, 'timestamp': '2025-09-30 22:36:11.322202', 'step': 15224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-30 22:36:11.392516', 'step': 15224, 'epoch': 3} {'type': 'loss', 'content': 0.07680313289165497, 'timestamp': '2025-09-30 22:36:11.405789', 'step': 15225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:11.471017', 'step': 15225, 'epoch': 3} {'type': 'loss', 'content': 0.16344407200813293, 'timestamp': '2025-09-30 22:36:11.473891', 'step': 15226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:11.551377', 'step': 15226, 'epoch': 3} {'type': 'loss', 'content': 0.11302007734775543, 'timestamp': '2025-09-30 22:36:11.558065', 'step': 15227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:11.618956', 'step': 15227, 'epoch': 3} {'type': 'loss', 'content': 0.08224672824144363, 'timestamp': '2025-09-30 22:36:11.627537', 'step': 15228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:11.688835', 'step': 15228, 'epoch': 3} {'type': 'loss', 'content': 0.11713120341300964, 'timestamp': '2025-09-30 22:36:11.693474', 'step': 15229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:11.752512', 'step': 15229, 'epoch': 3} {'type': 'loss', 'content': 0.1192990243434906, 'timestamp': '2025-09-30 22:36:11.755559', 'step': 15230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:11.823659', 'step': 15230, 'epoch': 3} {'type': 'loss', 'content': 0.16726060211658478, 'timestamp': '2025-09-30 22:36:11.829634', 'step': 15231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:11.925187', 'step': 15231, 'epoch': 3} {'type': 'loss', 'content': 0.09257682412862778, 'timestamp': '2025-09-30 22:36:11.935653', 'step': 15232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:12.009382', 'step': 15232, 'epoch': 3} {'type': 'loss', 'content': 0.06653010845184326, 'timestamp': '2025-09-30 22:36:12.012199', 'step': 15233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:12.077768', 'step': 15233, 'epoch': 3} {'type': 'loss', 'content': 0.12747201323509216, 'timestamp': '2025-09-30 22:36:12.081654', 'step': 15234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:12.141565', 'step': 15234, 'epoch': 3} {'type': 'loss', 'content': 0.05376565083861351, 'timestamp': '2025-09-30 22:36:12.145029', 'step': 15235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:12.207123', 'step': 15235, 'epoch': 3} {'type': 'loss', 'content': 0.06895159929990768, 'timestamp': '2025-09-30 22:36:12.214893', 'step': 15236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:12.275128', 'step': 15236, 'epoch': 3} {'type': 'loss', 'content': 0.09793562442064285, 'timestamp': '2025-09-30 22:36:12.278513', 'step': 15237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:12.352348', 'step': 15237, 'epoch': 3} {'type': 'loss', 'content': 0.0859973207116127, 'timestamp': '2025-09-30 22:36:12.358819', 'step': 15238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:12.422788', 'step': 15238, 'epoch': 3} {'type': 'loss', 'content': 0.11843418329954147, 'timestamp': '2025-09-30 22:36:12.429421', 'step': 15239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:12.492427', 'step': 15239, 'epoch': 3} {'type': 'loss', 'content': 0.10933732986450195, 'timestamp': '2025-09-30 22:36:12.500247', 'step': 15240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:12.560048', 'step': 15240, 'epoch': 3} {'type': 'loss', 'content': 0.17853647470474243, 'timestamp': '2025-09-30 22:36:12.562644', 'step': 15241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:12.626649', 'step': 15241, 'epoch': 3} {'type': 'loss', 'content': 0.10880391299724579, 'timestamp': '2025-09-30 22:36:12.630375', 'step': 15242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:12.690674', 'step': 15242, 'epoch': 3} {'type': 'loss', 'content': 0.08276589214801788, 'timestamp': '2025-09-30 22:36:12.693050', 'step': 15243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:12.752195', 'step': 15243, 'epoch': 3} {'type': 'loss', 'content': 0.05794043093919754, 'timestamp': '2025-09-30 22:36:12.759508', 'step': 15244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:12.818860', 'step': 15244, 'epoch': 3} {'type': 'loss', 'content': 0.08566848188638687, 'timestamp': '2025-09-30 22:36:12.828700', 'step': 15245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:12.887127', 'step': 15245, 'epoch': 3} {'type': 'loss', 'content': 0.20647045969963074, 'timestamp': '2025-09-30 22:36:12.889510', 'step': 15246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:12.952136', 'step': 15246, 'epoch': 3} {'type': 'loss', 'content': 0.08902419358491898, 'timestamp': '2025-09-30 22:36:12.955937', 'step': 15247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:13.021807', 'step': 15247, 'epoch': 3} {'type': 'loss', 'content': 0.13543376326560974, 'timestamp': '2025-09-30 22:36:13.028542', 'step': 15248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:13.086845', 'step': 15248, 'epoch': 3} {'type': 'loss', 'content': 0.08998048305511475, 'timestamp': '2025-09-30 22:36:13.090439', 'step': 15249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:13.149159', 'step': 15249, 'epoch': 3} {'type': 'loss', 'content': 0.0645003691315651, 'timestamp': '2025-09-30 22:36:13.157050', 'step': 15250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:13.214927', 'step': 15250, 'epoch': 3} {'type': 'loss', 'content': 0.16435851156711578, 'timestamp': '2025-09-30 22:36:13.225827', 'step': 15251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:13.288420', 'step': 15251, 'epoch': 3} {'type': 'loss', 'content': 0.1458008885383606, 'timestamp': '2025-09-30 22:36:13.304278', 'step': 15252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:13.361375', 'step': 15252, 'epoch': 3} {'type': 'loss', 'content': 0.05799331143498421, 'timestamp': '2025-09-30 22:36:13.367844', 'step': 15253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:13.426847', 'step': 15253, 'epoch': 3} {'type': 'loss', 'content': 0.12521898746490479, 'timestamp': '2025-09-30 22:36:13.429935', 'step': 15254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:13.489990', 'step': 15254, 'epoch': 3} {'type': 'loss', 'content': 0.04897945001721382, 'timestamp': '2025-09-30 22:36:13.498926', 'step': 15255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:13.557069', 'step': 15255, 'epoch': 3} {'type': 'loss', 'content': 0.08782254904508591, 'timestamp': '2025-09-30 22:36:13.563803', 'step': 15256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:13.621786', 'step': 15256, 'epoch': 3} {'type': 'loss', 'content': 0.17592349648475647, 'timestamp': '2025-09-30 22:36:13.624445', 'step': 15257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:13.682756', 'step': 15257, 'epoch': 3} {'type': 'loss', 'content': 0.07929584383964539, 'timestamp': '2025-09-30 22:36:13.685666', 'step': 15258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:13.744587', 'step': 15258, 'epoch': 3} {'type': 'loss', 'content': 0.08717632293701172, 'timestamp': '2025-09-30 22:36:13.748195', 'step': 15259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:13.811584', 'step': 15259, 'epoch': 3} {'type': 'loss', 'content': 0.09471473097801208, 'timestamp': '2025-09-30 22:36:13.825108', 'step': 15260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:13.897606', 'step': 15260, 'epoch': 3} {'type': 'loss', 'content': 0.10244657844305038, 'timestamp': '2025-09-30 22:36:13.902818', 'step': 15261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:13.972111', 'step': 15261, 'epoch': 3} {'type': 'loss', 'content': 0.11296636611223221, 'timestamp': '2025-09-30 22:36:13.975873', 'step': 15262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:14.033846', 'step': 15262, 'epoch': 3} {'type': 'loss', 'content': 0.10807965695858002, 'timestamp': '2025-09-30 22:36:14.036523', 'step': 15263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:14.093705', 'step': 15263, 'epoch': 3} {'type': 'loss', 'content': 0.09478669613599777, 'timestamp': '2025-09-30 22:36:14.108757', 'step': 15264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:14.169551', 'step': 15264, 'epoch': 3} {'type': 'loss', 'content': 0.13746215403079987, 'timestamp': '2025-09-30 22:36:14.172971', 'step': 15265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:14.237968', 'step': 15265, 'epoch': 3} {'type': 'loss', 'content': 0.15670213103294373, 'timestamp': '2025-09-30 22:36:14.247701', 'step': 15266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:14.310633', 'step': 15266, 'epoch': 3} {'type': 'loss', 'content': 0.21502023935317993, 'timestamp': '2025-09-30 22:36:14.313746', 'step': 15267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:14.371140', 'step': 15267, 'epoch': 3} {'type': 'loss', 'content': 0.05380832403898239, 'timestamp': '2025-09-30 22:36:14.377857', 'step': 15268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:14.440804', 'step': 15268, 'epoch': 3} {'type': 'loss', 'content': 0.1452985256910324, 'timestamp': '2025-09-30 22:36:14.443976', 'step': 15269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:14.515457', 'step': 15269, 'epoch': 3} {'type': 'loss', 'content': 0.07644431293010712, 'timestamp': '2025-09-30 22:36:14.518005', 'step': 15270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:14.580522', 'step': 15270, 'epoch': 3} {'type': 'loss', 'content': 0.09725809842348099, 'timestamp': '2025-09-30 22:36:14.589350', 'step': 15271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:14.648045', 'step': 15271, 'epoch': 3} {'type': 'loss', 'content': 0.10925928503274918, 'timestamp': '2025-09-30 22:36:14.660957', 'step': 15272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:14.725138', 'step': 15272, 'epoch': 3} {'type': 'loss', 'content': 0.06545286625623703, 'timestamp': '2025-09-30 22:36:14.728035', 'step': 15273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:14.785742', 'step': 15273, 'epoch': 3} {'type': 'loss', 'content': 0.061443932354450226, 'timestamp': '2025-09-30 22:36:14.788894', 'step': 15274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:14.847661', 'step': 15274, 'epoch': 3} {'type': 'loss', 'content': 0.032959844917058945, 'timestamp': '2025-09-30 22:36:14.850259', 'step': 15275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:14.906571', 'step': 15275, 'epoch': 3} {'type': 'loss', 'content': 0.22233721613883972, 'timestamp': '2025-09-30 22:36:14.913053', 'step': 15276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:14.973374', 'step': 15276, 'epoch': 3} {'type': 'loss', 'content': 0.13968105614185333, 'timestamp': '2025-09-30 22:36:14.976278', 'step': 15277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:15.034129', 'step': 15277, 'epoch': 3} {'type': 'loss', 'content': 0.047090619802474976, 'timestamp': '2025-09-30 22:36:15.038910', 'step': 15278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:15.103690', 'step': 15278, 'epoch': 3} {'type': 'loss', 'content': 0.06686492264270782, 'timestamp': '2025-09-30 22:36:15.108032', 'step': 15279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:15.166474', 'step': 15279, 'epoch': 3} {'type': 'loss', 'content': 0.16389678418636322, 'timestamp': '2025-09-30 22:36:15.173765', 'step': 15280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:15.240820', 'step': 15280, 'epoch': 3} {'type': 'loss', 'content': 0.05902733653783798, 'timestamp': '2025-09-30 22:36:15.248952', 'step': 15281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:15.306757', 'step': 15281, 'epoch': 3} {'type': 'loss', 'content': 0.0753542110323906, 'timestamp': '2025-09-30 22:36:15.314956', 'step': 15282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:15.380496', 'step': 15282, 'epoch': 3} {'type': 'loss', 'content': 0.04270832985639572, 'timestamp': '2025-09-30 22:36:15.389601', 'step': 15283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:15.448378', 'step': 15283, 'epoch': 3} {'type': 'loss', 'content': 0.07551620900630951, 'timestamp': '2025-09-30 22:36:15.460202', 'step': 15284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:15.518250', 'step': 15284, 'epoch': 3} {'type': 'loss', 'content': 0.16350522637367249, 'timestamp': '2025-09-30 22:36:15.529770', 'step': 15285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:15.596655', 'step': 15285, 'epoch': 3} {'type': 'loss', 'content': 0.07367586344480515, 'timestamp': '2025-09-30 22:36:15.601369', 'step': 15286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:15.666897', 'step': 15286, 'epoch': 3} {'type': 'loss', 'content': 0.23239882290363312, 'timestamp': '2025-09-30 22:36:15.672770', 'step': 15287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:15.738660', 'step': 15287, 'epoch': 3} {'type': 'loss', 'content': 0.06927359849214554, 'timestamp': '2025-09-30 22:36:15.746285', 'step': 15288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:15.803608', 'step': 15288, 'epoch': 3} {'type': 'loss', 'content': 0.12638376653194427, 'timestamp': '2025-09-30 22:36:15.806775', 'step': 15289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:15.864531', 'step': 15289, 'epoch': 3} {'type': 'loss', 'content': 0.050424974411726, 'timestamp': '2025-09-30 22:36:15.867702', 'step': 15290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:15.924929', 'step': 15290, 'epoch': 3} {'type': 'loss', 'content': 0.13081671297550201, 'timestamp': '2025-09-30 22:36:15.927800', 'step': 15291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:15.986408', 'step': 15291, 'epoch': 3} {'type': 'loss', 'content': 0.14865338802337646, 'timestamp': '2025-09-30 22:36:15.993463', 'step': 15292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:36:16.064121', 'step': 15292, 'epoch': 3} {'type': 'loss', 'content': 0.07370680570602417, 'timestamp': '2025-09-30 22:36:16.067714', 'step': 15293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:16.126087', 'step': 15293, 'epoch': 3} {'type': 'loss', 'content': 0.1071314811706543, 'timestamp': '2025-09-30 22:36:16.130024', 'step': 15294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:16.192841', 'step': 15294, 'epoch': 3} {'type': 'loss', 'content': 0.10254988074302673, 'timestamp': '2025-09-30 22:36:16.198851', 'step': 15295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:16.256584', 'step': 15295, 'epoch': 3} {'type': 'loss', 'content': 0.09364695101976395, 'timestamp': '2025-09-30 22:36:16.265072', 'step': 15296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:16.327843', 'step': 15296, 'epoch': 3} {'type': 'loss', 'content': 0.1235848143696785, 'timestamp': '2025-09-30 22:36:16.331285', 'step': 15297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:16.390975', 'step': 15297, 'epoch': 3} {'type': 'loss', 'content': 0.05809362232685089, 'timestamp': '2025-09-30 22:36:16.394326', 'step': 15298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:16.454651', 'step': 15298, 'epoch': 3} {'type': 'loss', 'content': 0.1412702053785324, 'timestamp': '2025-09-30 22:36:16.457917', 'step': 15299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:16.524913', 'step': 15299, 'epoch': 3} {'type': 'loss', 'content': 0.18656805157661438, 'timestamp': '2025-09-30 22:36:16.531563', 'step': 15300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:16.588356', 'step': 15300, 'epoch': 3} {'type': 'loss', 'content': 0.15458306670188904, 'timestamp': '2025-09-30 22:36:16.597317', 'step': 15301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:16.655266', 'step': 15301, 'epoch': 3} {'type': 'loss', 'content': 0.20296353101730347, 'timestamp': '2025-09-30 22:36:16.659414', 'step': 15302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:16.723082', 'step': 15302, 'epoch': 3} {'type': 'loss', 'content': 0.057412732392549515, 'timestamp': '2025-09-30 22:36:16.725906', 'step': 15303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:16.787527', 'step': 15303, 'epoch': 3} {'type': 'loss', 'content': 0.10147111862897873, 'timestamp': '2025-09-30 22:36:16.793800', 'step': 15304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:16.850208', 'step': 15304, 'epoch': 3} {'type': 'loss', 'content': 0.092384472489357, 'timestamp': '2025-09-30 22:36:16.853691', 'step': 15305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:16.910500', 'step': 15305, 'epoch': 3} {'type': 'loss', 'content': 0.0506327822804451, 'timestamp': '2025-09-30 22:36:16.915355', 'step': 15306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:16.983650', 'step': 15306, 'epoch': 3} {'type': 'loss', 'content': 0.03364506736397743, 'timestamp': '2025-09-30 22:36:16.993199', 'step': 15307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:17.055430', 'step': 15307, 'epoch': 3} {'type': 'loss', 'content': 0.158791646361351, 'timestamp': '2025-09-30 22:36:17.061668', 'step': 15308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:17.124519', 'step': 15308, 'epoch': 3} {'type': 'loss', 'content': 0.13508419692516327, 'timestamp': '2025-09-30 22:36:17.129112', 'step': 15309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:17.189413', 'step': 15309, 'epoch': 3} {'type': 'loss', 'content': 0.09061278402805328, 'timestamp': '2025-09-30 22:36:17.192027', 'step': 15310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:17.254992', 'step': 15310, 'epoch': 3} {'type': 'loss', 'content': 0.12512676417827606, 'timestamp': '2025-09-30 22:36:17.260445', 'step': 15311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:17.322530', 'step': 15311, 'epoch': 3} {'type': 'loss', 'content': 0.09902232885360718, 'timestamp': '2025-09-30 22:36:17.328646', 'step': 15312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:17.388434', 'step': 15312, 'epoch': 3} {'type': 'loss', 'content': 0.10803916305303574, 'timestamp': '2025-09-30 22:36:17.398917', 'step': 15313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:17.456895', 'step': 15313, 'epoch': 3} {'type': 'loss', 'content': 0.08247970044612885, 'timestamp': '2025-09-30 22:36:17.468044', 'step': 15314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:17.526135', 'step': 15314, 'epoch': 3} {'type': 'loss', 'content': 0.18656030297279358, 'timestamp': '2025-09-30 22:36:17.530030', 'step': 15315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:17.598947', 'step': 15315, 'epoch': 3} {'type': 'loss', 'content': 0.09498989582061768, 'timestamp': '2025-09-30 22:36:17.607414', 'step': 15316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:17.670851', 'step': 15316, 'epoch': 3} {'type': 'loss', 'content': 0.09912674874067307, 'timestamp': '2025-09-30 22:36:17.681780', 'step': 15317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:17.740074', 'step': 15317, 'epoch': 3} {'type': 'loss', 'content': 0.21314381062984467, 'timestamp': '2025-09-30 22:36:17.749766', 'step': 15318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:17.808613', 'step': 15318, 'epoch': 3} {'type': 'loss', 'content': 0.08949454873800278, 'timestamp': '2025-09-30 22:36:17.816340', 'step': 15319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:17.882863', 'step': 15319, 'epoch': 3} {'type': 'loss', 'content': 0.142811581492424, 'timestamp': '2025-09-30 22:36:17.890024', 'step': 15320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:17.949352', 'step': 15320, 'epoch': 3} {'type': 'loss', 'content': 0.1235528439283371, 'timestamp': '2025-09-30 22:36:17.953047', 'step': 15321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:18.020070', 'step': 15321, 'epoch': 3} {'type': 'loss', 'content': 0.10661772638559341, 'timestamp': '2025-09-30 22:36:18.031416', 'step': 15322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:18.089413', 'step': 15322, 'epoch': 3} {'type': 'loss', 'content': 0.1340596079826355, 'timestamp': '2025-09-30 22:36:18.093117', 'step': 15323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:18.151646', 'step': 15323, 'epoch': 3} {'type': 'loss', 'content': 0.06494161486625671, 'timestamp': '2025-09-30 22:36:18.158036', 'step': 15324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:18.213766', 'step': 15324, 'epoch': 3} {'type': 'loss', 'content': 0.15090994536876678, 'timestamp': '2025-09-30 22:36:18.216038', 'step': 15325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:18.277118', 'step': 15325, 'epoch': 3} {'type': 'loss', 'content': 0.09810706973075867, 'timestamp': '2025-09-30 22:36:18.280506', 'step': 15326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:18.340573', 'step': 15326, 'epoch': 3} {'type': 'loss', 'content': 0.04721473902463913, 'timestamp': '2025-09-30 22:36:18.348133', 'step': 15327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:18.417157', 'step': 15327, 'epoch': 3} {'type': 'loss', 'content': 0.09764932841062546, 'timestamp': '2025-09-30 22:36:18.426367', 'step': 15328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:18.483289', 'step': 15328, 'epoch': 3} {'type': 'loss', 'content': 0.08753033727407455, 'timestamp': '2025-09-30 22:36:18.486127', 'step': 15329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:18.546157', 'step': 15329, 'epoch': 3} {'type': 'loss', 'content': 0.1243971437215805, 'timestamp': '2025-09-30 22:36:18.549006', 'step': 15330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:18.606477', 'step': 15330, 'epoch': 3} {'type': 'loss', 'content': 0.1838952898979187, 'timestamp': '2025-09-30 22:36:18.618467', 'step': 15331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:18.683896', 'step': 15331, 'epoch': 3} {'type': 'loss', 'content': 0.11191626638174057, 'timestamp': '2025-09-30 22:36:18.690714', 'step': 15332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:18.757916', 'step': 15332, 'epoch': 3} {'type': 'loss', 'content': 0.09472772479057312, 'timestamp': '2025-09-30 22:36:18.761047', 'step': 15333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:18.819051', 'step': 15333, 'epoch': 3} {'type': 'loss', 'content': 0.0580458901822567, 'timestamp': '2025-09-30 22:36:18.823540', 'step': 15334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:18.880644', 'step': 15334, 'epoch': 3} {'type': 'loss', 'content': 0.04658007249236107, 'timestamp': '2025-09-30 22:36:18.883781', 'step': 15335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:18.943604', 'step': 15335, 'epoch': 3} {'type': 'loss', 'content': 0.055837199091911316, 'timestamp': '2025-09-30 22:36:18.951384', 'step': 15336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:19.024272', 'step': 15336, 'epoch': 3} {'type': 'loss', 'content': 0.04390500858426094, 'timestamp': '2025-09-30 22:36:19.027845', 'step': 15337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:19.086007', 'step': 15337, 'epoch': 3} {'type': 'loss', 'content': 0.07936523109674454, 'timestamp': '2025-09-30 22:36:19.094598', 'step': 15338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:19.158481', 'step': 15338, 'epoch': 3} {'type': 'loss', 'content': 0.06279578804969788, 'timestamp': '2025-09-30 22:36:19.161371', 'step': 15339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:19.223485', 'step': 15339, 'epoch': 3} {'type': 'loss', 'content': 0.11118617653846741, 'timestamp': '2025-09-30 22:36:19.230097', 'step': 15340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:19.286837', 'step': 15340, 'epoch': 3} {'type': 'loss', 'content': 0.05772749334573746, 'timestamp': '2025-09-30 22:36:19.290036', 'step': 15341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:19.354194', 'step': 15341, 'epoch': 3} {'type': 'loss', 'content': 0.14886215329170227, 'timestamp': '2025-09-30 22:36:19.358854', 'step': 15342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:19.416986', 'step': 15342, 'epoch': 3} {'type': 'loss', 'content': 0.09741947799921036, 'timestamp': '2025-09-30 22:36:19.421307', 'step': 15343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:19.485304', 'step': 15343, 'epoch': 3} {'type': 'loss', 'content': 0.10290030390024185, 'timestamp': '2025-09-30 22:36:19.492878', 'step': 15344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:19.549650', 'step': 15344, 'epoch': 3} {'type': 'loss', 'content': 0.07244466990232468, 'timestamp': '2025-09-30 22:36:19.553572', 'step': 15345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:19.611980', 'step': 15345, 'epoch': 3} {'type': 'loss', 'content': 0.11783367395401001, 'timestamp': '2025-09-30 22:36:19.620331', 'step': 15346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:19.679053', 'step': 15346, 'epoch': 3} {'type': 'loss', 'content': 0.17592847347259521, 'timestamp': '2025-09-30 22:36:19.684243', 'step': 15347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:19.742857', 'step': 15347, 'epoch': 3} {'type': 'loss', 'content': 0.1796686202287674, 'timestamp': '2025-09-30 22:36:19.763214', 'step': 15348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:19.820205', 'step': 15348, 'epoch': 3} {'type': 'loss', 'content': 0.07522427290678024, 'timestamp': '2025-09-30 22:36:19.823433', 'step': 15349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:19.884665', 'step': 15349, 'epoch': 3} {'type': 'loss', 'content': 0.01913979835808277, 'timestamp': '2025-09-30 22:36:19.888033', 'step': 15350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:19.946482', 'step': 15350, 'epoch': 3} {'type': 'loss', 'content': 0.07541253417730331, 'timestamp': '2025-09-30 22:36:19.949948', 'step': 15351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:20.008457', 'step': 15351, 'epoch': 3} {'type': 'loss', 'content': 0.07336626201868057, 'timestamp': '2025-09-30 22:36:20.016322', 'step': 15352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:20.073855', 'step': 15352, 'epoch': 3} {'type': 'loss', 'content': 0.062088944017887115, 'timestamp': '2025-09-30 22:36:20.077898', 'step': 15353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:20.139845', 'step': 15353, 'epoch': 3} {'type': 'loss', 'content': 0.05930732935667038, 'timestamp': '2025-09-30 22:36:20.149141', 'step': 15354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:20.213133', 'step': 15354, 'epoch': 3} {'type': 'loss', 'content': 0.1518850177526474, 'timestamp': '2025-09-30 22:36:20.221870', 'step': 15355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:20.283665', 'step': 15355, 'epoch': 3} {'type': 'loss', 'content': 0.07246659696102142, 'timestamp': '2025-09-30 22:36:20.289668', 'step': 15356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:20.346056', 'step': 15356, 'epoch': 3} {'type': 'loss', 'content': 0.159174844622612, 'timestamp': '2025-09-30 22:36:20.349151', 'step': 15357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:20.411079', 'step': 15357, 'epoch': 3} {'type': 'loss', 'content': 0.12875929474830627, 'timestamp': '2025-09-30 22:36:20.414969', 'step': 15358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:20.474259', 'step': 15358, 'epoch': 3} {'type': 'loss', 'content': 0.04537466913461685, 'timestamp': '2025-09-30 22:36:20.477626', 'step': 15359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:20.535458', 'step': 15359, 'epoch': 3} {'type': 'loss', 'content': 0.1345638930797577, 'timestamp': '2025-09-30 22:36:20.542364', 'step': 15360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:20.606999', 'step': 15360, 'epoch': 3} {'type': 'loss', 'content': 0.08139295876026154, 'timestamp': '2025-09-30 22:36:20.609944', 'step': 15361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:20.667498', 'step': 15361, 'epoch': 3} {'type': 'loss', 'content': 0.0389295294880867, 'timestamp': '2025-09-30 22:36:20.670753', 'step': 15362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:20.729152', 'step': 15362, 'epoch': 3} {'type': 'loss', 'content': 0.11045621335506439, 'timestamp': '2025-09-30 22:36:20.732362', 'step': 15363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:20.796531', 'step': 15363, 'epoch': 3} {'type': 'loss', 'content': 0.1617581993341446, 'timestamp': '2025-09-30 22:36:20.807220', 'step': 15364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:20.865571', 'step': 15364, 'epoch': 3} {'type': 'loss', 'content': 0.127837672829628, 'timestamp': '2025-09-30 22:36:20.869116', 'step': 15365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:20.927366', 'step': 15365, 'epoch': 3} {'type': 'loss', 'content': 0.13096198439598083, 'timestamp': '2025-09-30 22:36:20.932118', 'step': 15366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:20.996890', 'step': 15366, 'epoch': 3} {'type': 'loss', 'content': 0.14118237793445587, 'timestamp': '2025-09-30 22:36:21.000402', 'step': 15367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:21.063175', 'step': 15367, 'epoch': 3} {'type': 'loss', 'content': 0.09436507523059845, 'timestamp': '2025-09-30 22:36:21.076627', 'step': 15368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:21.134955', 'step': 15368, 'epoch': 3} {'type': 'loss', 'content': 0.08692873269319534, 'timestamp': '2025-09-30 22:36:21.146810', 'step': 15369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:21.204701', 'step': 15369, 'epoch': 3} {'type': 'loss', 'content': 0.08477263152599335, 'timestamp': '2025-09-30 22:36:21.208360', 'step': 15370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:21.266143', 'step': 15370, 'epoch': 3} {'type': 'loss', 'content': 0.11999887973070145, 'timestamp': '2025-09-30 22:36:21.268934', 'step': 15371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:21.331342', 'step': 15371, 'epoch': 3} {'type': 'loss', 'content': 0.069831982254982, 'timestamp': '2025-09-30 22:36:21.345141', 'step': 15372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:21.406219', 'step': 15372, 'epoch': 3} {'type': 'loss', 'content': 0.1417265683412552, 'timestamp': '2025-09-30 22:36:21.409540', 'step': 15373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:21.470858', 'step': 15373, 'epoch': 3} {'type': 'loss', 'content': 0.1413550078868866, 'timestamp': '2025-09-30 22:36:21.473389', 'step': 15374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:21.537279', 'step': 15374, 'epoch': 3} {'type': 'loss', 'content': 0.1597946137189865, 'timestamp': '2025-09-30 22:36:21.540097', 'step': 15375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:21.603225', 'step': 15375, 'epoch': 3} {'type': 'loss', 'content': 0.07399414479732513, 'timestamp': '2025-09-30 22:36:21.610302', 'step': 15376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:21.673709', 'step': 15376, 'epoch': 3} {'type': 'loss', 'content': 0.12200115621089935, 'timestamp': '2025-09-30 22:36:21.677393', 'step': 15377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:21.734698', 'step': 15377, 'epoch': 3} {'type': 'loss', 'content': 0.06272896379232407, 'timestamp': '2025-09-30 22:36:21.737763', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:36:35.439908', 'step': 15378, 'epoch': 3} {'type': 'pplx', 'content': 10932.349263911197, 'timestamp': '2025-09-30 22:36:35.443789', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:35.513986', 'step': 15378, 'epoch': 3} {'type': 'loss', 'content': 0.08539362251758575, 'timestamp': '2025-09-30 22:36:35.522912', 'step': 15379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:35.581050', 'step': 15379, 'epoch': 3} {'type': 'loss', 'content': 0.19315205514431, 'timestamp': '2025-09-30 22:36:35.588042', 'step': 15380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:35.644472', 'step': 15380, 'epoch': 3} {'type': 'loss', 'content': 0.11109531670808792, 'timestamp': '2025-09-30 22:36:35.647163', 'step': 15381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:35.704068', 'step': 15381, 'epoch': 3} {'type': 'loss', 'content': 0.15977652370929718, 'timestamp': '2025-09-30 22:36:35.710043', 'step': 15382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:35.774397', 'step': 15382, 'epoch': 3} {'type': 'loss', 'content': 0.037393469363451004, 'timestamp': '2025-09-30 22:36:35.777656', 'step': 15383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:35.856820', 'step': 15383, 'epoch': 3} {'type': 'loss', 'content': 0.11517797410488129, 'timestamp': '2025-09-30 22:36:35.868040', 'step': 15384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:35.929846', 'step': 15384, 'epoch': 3} {'type': 'loss', 'content': 0.0669010728597641, 'timestamp': '2025-09-30 22:36:35.933359', 'step': 15385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:35.996878', 'step': 15385, 'epoch': 3} {'type': 'loss', 'content': 0.08563103526830673, 'timestamp': '2025-09-30 22:36:36.001730', 'step': 15386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:36.066644', 'step': 15386, 'epoch': 3} {'type': 'loss', 'content': 0.05231088772416115, 'timestamp': '2025-09-30 22:36:36.077910', 'step': 15387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:36.149757', 'step': 15387, 'epoch': 3} {'type': 'loss', 'content': 0.11731525510549545, 'timestamp': '2025-09-30 22:36:36.160208', 'step': 15388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:36.225760', 'step': 15388, 'epoch': 3} {'type': 'loss', 'content': 0.11638081073760986, 'timestamp': '2025-09-30 22:36:36.231629', 'step': 15389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:36.293468', 'step': 15389, 'epoch': 3} {'type': 'loss', 'content': 0.05822761729359627, 'timestamp': '2025-09-30 22:36:36.301265', 'step': 15390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:36.368656', 'step': 15390, 'epoch': 3} {'type': 'loss', 'content': 0.15796412527561188, 'timestamp': '2025-09-30 22:36:36.383101', 'step': 15391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:36.450821', 'step': 15391, 'epoch': 3} {'type': 'loss', 'content': 0.09881147742271423, 'timestamp': '2025-09-30 22:36:36.462910', 'step': 15392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:36.534418', 'step': 15392, 'epoch': 3} {'type': 'loss', 'content': 0.057542603462934494, 'timestamp': '2025-09-30 22:36:36.539908', 'step': 15393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:36.603314', 'step': 15393, 'epoch': 3} {'type': 'loss', 'content': 0.02860165573656559, 'timestamp': '2025-09-30 22:36:36.607933', 'step': 15394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:36.670800', 'step': 15394, 'epoch': 3} {'type': 'loss', 'content': 0.17579209804534912, 'timestamp': '2025-09-30 22:36:36.691280', 'step': 15395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:36.756399', 'step': 15395, 'epoch': 3} {'type': 'loss', 'content': 0.056271471083164215, 'timestamp': '2025-09-30 22:36:36.770729', 'step': 15396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:36.837464', 'step': 15396, 'epoch': 3} {'type': 'loss', 'content': 0.10535577684640884, 'timestamp': '2025-09-30 22:36:36.848117', 'step': 15397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:36.909386', 'step': 15397, 'epoch': 3} {'type': 'loss', 'content': 0.08071210235357285, 'timestamp': '2025-09-30 22:36:36.914051', 'step': 15398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:36.975408', 'step': 15398, 'epoch': 3} {'type': 'loss', 'content': 0.10070116817951202, 'timestamp': '2025-09-30 22:36:36.983266', 'step': 15399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:37.061090', 'step': 15399, 'epoch': 3} {'type': 'loss', 'content': 0.22190552949905396, 'timestamp': '2025-09-30 22:36:37.077569', 'step': 15400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:36:37.155201', 'step': 15400, 'epoch': 3} {'type': 'loss', 'content': 0.14066831767559052, 'timestamp': '2025-09-30 22:36:37.159939', 'step': 15401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:37.229117', 'step': 15401, 'epoch': 3} {'type': 'loss', 'content': 0.13241669535636902, 'timestamp': '2025-09-30 22:36:37.238558', 'step': 15402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:37.306512', 'step': 15402, 'epoch': 3} {'type': 'loss', 'content': 0.0918552428483963, 'timestamp': '2025-09-30 22:36:37.318533', 'step': 15403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:37.399565', 'step': 15403, 'epoch': 3} {'type': 'loss', 'content': 0.1323668509721756, 'timestamp': '2025-09-30 22:36:37.407409', 'step': 15404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:37.474395', 'step': 15404, 'epoch': 3} {'type': 'loss', 'content': 0.08251692354679108, 'timestamp': '2025-09-30 22:36:37.477276', 'step': 15405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:37.535052', 'step': 15405, 'epoch': 3} {'type': 'loss', 'content': 0.06379105895757675, 'timestamp': '2025-09-30 22:36:37.545602', 'step': 15406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:37.602672', 'step': 15406, 'epoch': 3} {'type': 'loss', 'content': 0.026463650166988373, 'timestamp': '2025-09-30 22:36:37.610303', 'step': 15407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:37.673061', 'step': 15407, 'epoch': 3} {'type': 'loss', 'content': 0.13033461570739746, 'timestamp': '2025-09-30 22:36:37.679616', 'step': 15408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:37.737294', 'step': 15408, 'epoch': 3} {'type': 'loss', 'content': 0.1018168106675148, 'timestamp': '2025-09-30 22:36:37.739937', 'step': 15409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:37.797683', 'step': 15409, 'epoch': 3} {'type': 'loss', 'content': 0.11276774108409882, 'timestamp': '2025-09-30 22:36:37.800419', 'step': 15410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:37.858163', 'step': 15410, 'epoch': 3} {'type': 'loss', 'content': 0.1830003261566162, 'timestamp': '2025-09-30 22:36:37.877975', 'step': 15411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:37.936959', 'step': 15411, 'epoch': 3} {'type': 'loss', 'content': 0.06595680862665176, 'timestamp': '2025-09-30 22:36:37.942934', 'step': 15412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:37.999242', 'step': 15412, 'epoch': 3} {'type': 'loss', 'content': 0.07613711059093475, 'timestamp': '2025-09-30 22:36:38.002556', 'step': 15413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:38.071957', 'step': 15413, 'epoch': 3} {'type': 'loss', 'content': 0.13056157529354095, 'timestamp': '2025-09-30 22:36:38.086525', 'step': 15414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:38.145231', 'step': 15414, 'epoch': 3} {'type': 'loss', 'content': 0.14265762269496918, 'timestamp': '2025-09-30 22:36:38.147840', 'step': 15415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:38.212599', 'step': 15415, 'epoch': 3} {'type': 'loss', 'content': 0.1775706261396408, 'timestamp': '2025-09-30 22:36:38.218614', 'step': 15416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:38.275165', 'step': 15416, 'epoch': 3} {'type': 'loss', 'content': 0.10296101868152618, 'timestamp': '2025-09-30 22:36:38.283977', 'step': 15417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:38.345138', 'step': 15417, 'epoch': 3} {'type': 'loss', 'content': 0.08623086661100388, 'timestamp': '2025-09-30 22:36:38.349377', 'step': 15418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:38.413457', 'step': 15418, 'epoch': 3} {'type': 'loss', 'content': 0.051016464829444885, 'timestamp': '2025-09-30 22:36:38.422205', 'step': 15419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:38.496237', 'step': 15419, 'epoch': 3} {'type': 'loss', 'content': 0.04468134045600891, 'timestamp': '2025-09-30 22:36:38.507751', 'step': 15420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:38.572591', 'step': 15420, 'epoch': 3} {'type': 'loss', 'content': 0.03799910098314285, 'timestamp': '2025-09-30 22:36:38.586171', 'step': 15421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:38.642039', 'step': 15421, 'epoch': 3} {'type': 'loss', 'content': 0.1300821751356125, 'timestamp': '2025-09-30 22:36:38.647023', 'step': 15422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:38.711030', 'step': 15422, 'epoch': 3} {'type': 'loss', 'content': 0.08618252724409103, 'timestamp': '2025-09-30 22:36:38.713389', 'step': 15423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:38.770771', 'step': 15423, 'epoch': 3} {'type': 'loss', 'content': 0.08150497823953629, 'timestamp': '2025-09-30 22:36:38.776793', 'step': 15424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:38.833565', 'step': 15424, 'epoch': 3} {'type': 'loss', 'content': 0.06466546654701233, 'timestamp': '2025-09-30 22:36:38.838523', 'step': 15425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:38.896604', 'step': 15425, 'epoch': 3} {'type': 'loss', 'content': 0.10197416692972183, 'timestamp': '2025-09-30 22:36:38.900007', 'step': 15426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:38.956954', 'step': 15426, 'epoch': 3} {'type': 'loss', 'content': 0.12394989281892776, 'timestamp': '2025-09-30 22:36:38.971739', 'step': 15427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:39.029750', 'step': 15427, 'epoch': 3} {'type': 'loss', 'content': 0.07216091454029083, 'timestamp': '2025-09-30 22:36:39.035967', 'step': 15428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:39.092052', 'step': 15428, 'epoch': 3} {'type': 'loss', 'content': 0.09634222090244293, 'timestamp': '2025-09-30 22:36:39.097757', 'step': 15429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:39.154805', 'step': 15429, 'epoch': 3} {'type': 'loss', 'content': 0.05950389802455902, 'timestamp': '2025-09-30 22:36:39.157686', 'step': 15430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:39.219066', 'step': 15430, 'epoch': 3} {'type': 'loss', 'content': 0.03533746674656868, 'timestamp': '2025-09-30 22:36:39.228373', 'step': 15431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:39.289363', 'step': 15431, 'epoch': 3} {'type': 'loss', 'content': 0.07073134928941727, 'timestamp': '2025-09-30 22:36:39.297548', 'step': 15432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:39.354976', 'step': 15432, 'epoch': 3} {'type': 'loss', 'content': 0.16502784192562103, 'timestamp': '2025-09-30 22:36:39.361530', 'step': 15433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:39.423436', 'step': 15433, 'epoch': 3} {'type': 'loss', 'content': 0.14076805114746094, 'timestamp': '2025-09-30 22:36:39.427151', 'step': 15434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:39.484648', 'step': 15434, 'epoch': 3} {'type': 'loss', 'content': 0.07145881652832031, 'timestamp': '2025-09-30 22:36:39.487748', 'step': 15435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:39.555647', 'step': 15435, 'epoch': 3} {'type': 'loss', 'content': 0.07833801954984665, 'timestamp': '2025-09-30 22:36:39.562270', 'step': 15436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:39.630547', 'step': 15436, 'epoch': 3} {'type': 'loss', 'content': 0.1451120674610138, 'timestamp': '2025-09-30 22:36:39.638134', 'step': 15437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:39.698123', 'step': 15437, 'epoch': 3} {'type': 'loss', 'content': 0.0741616040468216, 'timestamp': '2025-09-30 22:36:39.701105', 'step': 15438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:39.766955', 'step': 15438, 'epoch': 3} {'type': 'loss', 'content': 0.162648543715477, 'timestamp': '2025-09-30 22:36:39.773528', 'step': 15439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:39.836947', 'step': 15439, 'epoch': 3} {'type': 'loss', 'content': 0.10548274219036102, 'timestamp': '2025-09-30 22:36:39.850991', 'step': 15440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:39.910301', 'step': 15440, 'epoch': 3} {'type': 'loss', 'content': 0.0956348106265068, 'timestamp': '2025-09-30 22:36:39.919208', 'step': 15441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:39.984441', 'step': 15441, 'epoch': 3} {'type': 'loss', 'content': 0.2356310784816742, 'timestamp': '2025-09-30 22:36:39.990414', 'step': 15442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:40.048376', 'step': 15442, 'epoch': 3} {'type': 'loss', 'content': 0.1171623170375824, 'timestamp': '2025-09-30 22:36:40.051107', 'step': 15443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:40.111979', 'step': 15443, 'epoch': 3} {'type': 'loss', 'content': 0.012570168823003769, 'timestamp': '2025-09-30 22:36:40.118641', 'step': 15444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:40.174784', 'step': 15444, 'epoch': 3} {'type': 'loss', 'content': 0.05715107172727585, 'timestamp': '2025-09-30 22:36:40.177635', 'step': 15445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:40.234863', 'step': 15445, 'epoch': 3} {'type': 'loss', 'content': 0.1509568989276886, 'timestamp': '2025-09-30 22:36:40.239042', 'step': 15446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:40.301830', 'step': 15446, 'epoch': 3} {'type': 'loss', 'content': 0.11850185692310333, 'timestamp': '2025-09-30 22:36:40.318177', 'step': 15447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:36:40.381938', 'step': 15447, 'epoch': 3} {'type': 'loss', 'content': 0.13434743881225586, 'timestamp': '2025-09-30 22:36:40.394569', 'step': 15448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:40.454208', 'step': 15448, 'epoch': 3} {'type': 'loss', 'content': 0.08553463220596313, 'timestamp': '2025-09-30 22:36:40.457235', 'step': 15449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:40.521205', 'step': 15449, 'epoch': 3} {'type': 'loss', 'content': 0.10614388436079025, 'timestamp': '2025-09-30 22:36:40.525243', 'step': 15450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:40.582341', 'step': 15450, 'epoch': 3} {'type': 'loss', 'content': 0.10683058947324753, 'timestamp': '2025-09-30 22:36:40.586280', 'step': 15451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:40.643840', 'step': 15451, 'epoch': 3} {'type': 'loss', 'content': 0.04232184588909149, 'timestamp': '2025-09-30 22:36:40.656055', 'step': 15452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:36:40.711961', 'step': 15452, 'epoch': 3} {'type': 'loss', 'content': 0.10457367449998856, 'timestamp': '2025-09-30 22:36:40.721097', 'step': 15453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:36:40.792374', 'step': 15453, 'epoch': 3} {'type': 'loss', 'content': 0.0913853794336319, 'timestamp': '2025-09-30 22:36:40.803770', 'step': 15454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:40.863746', 'step': 15454, 'epoch': 3} {'type': 'loss', 'content': 0.1423983871936798, 'timestamp': '2025-09-30 22:36:40.871450', 'step': 15455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:40.934930', 'step': 15455, 'epoch': 3} {'type': 'loss', 'content': 0.150801882147789, 'timestamp': '2025-09-30 22:36:40.944385', 'step': 15456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.003573', 'step': 15456, 'epoch': 3} {'type': 'loss', 'content': 0.05695275217294693, 'timestamp': '2025-09-30 22:36:41.006166', 'step': 15457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.063440', 'step': 15457, 'epoch': 3} {'type': 'loss', 'content': 0.059716466814279556, 'timestamp': '2025-09-30 22:36:41.067455', 'step': 15458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.126761', 'step': 15458, 'epoch': 3} {'type': 'loss', 'content': 0.19212110340595245, 'timestamp': '2025-09-30 22:36:41.130306', 'step': 15459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:41.194986', 'step': 15459, 'epoch': 3} {'type': 'loss', 'content': 0.10022146254777908, 'timestamp': '2025-09-30 22:36:41.209054', 'step': 15460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.276307', 'step': 15460, 'epoch': 3} {'type': 'loss', 'content': 0.08877173066139221, 'timestamp': '2025-09-30 22:36:41.279641', 'step': 15461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.338271', 'step': 15461, 'epoch': 3} {'type': 'loss', 'content': 0.11789695173501968, 'timestamp': '2025-09-30 22:36:41.341342', 'step': 15462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.398587', 'step': 15462, 'epoch': 3} {'type': 'loss', 'content': 0.05103441700339317, 'timestamp': '2025-09-30 22:36:41.408296', 'step': 15463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:41.465601', 'step': 15463, 'epoch': 3} {'type': 'loss', 'content': 0.057219862937927246, 'timestamp': '2025-09-30 22:36:41.472623', 'step': 15464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:41.529418', 'step': 15464, 'epoch': 3} {'type': 'loss', 'content': 0.11031213402748108, 'timestamp': '2025-09-30 22:36:41.532206', 'step': 15465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.589959', 'step': 15465, 'epoch': 3} {'type': 'loss', 'content': 0.11080172657966614, 'timestamp': '2025-09-30 22:36:41.592533', 'step': 15466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:41.656600', 'step': 15466, 'epoch': 3} {'type': 'loss', 'content': 0.09780693054199219, 'timestamp': '2025-09-30 22:36:41.672222', 'step': 15467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:41.735100', 'step': 15467, 'epoch': 3} {'type': 'loss', 'content': 0.17002934217453003, 'timestamp': '2025-09-30 22:36:41.749148', 'step': 15468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:41.809386', 'step': 15468, 'epoch': 3} {'type': 'loss', 'content': 0.12740424275398254, 'timestamp': '2025-09-30 22:36:41.812629', 'step': 15469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.869565', 'step': 15469, 'epoch': 3} {'type': 'loss', 'content': 0.05836477130651474, 'timestamp': '2025-09-30 22:36:41.873474', 'step': 15470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:41.931252', 'step': 15470, 'epoch': 3} {'type': 'loss', 'content': 0.05799959972500801, 'timestamp': '2025-09-30 22:36:41.934547', 'step': 15471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:42.002127', 'step': 15471, 'epoch': 3} {'type': 'loss', 'content': 0.16835391521453857, 'timestamp': '2025-09-30 22:36:42.008614', 'step': 15472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:42.073350', 'step': 15472, 'epoch': 3} {'type': 'loss', 'content': 0.1294320970773697, 'timestamp': '2025-09-30 22:36:42.076172', 'step': 15473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:42.142623', 'step': 15473, 'epoch': 3} {'type': 'loss', 'content': 0.10418889671564102, 'timestamp': '2025-09-30 22:36:42.146969', 'step': 15474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:42.206828', 'step': 15474, 'epoch': 3} {'type': 'loss', 'content': 0.10446509718894958, 'timestamp': '2025-09-30 22:36:42.212762', 'step': 15475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:42.271787', 'step': 15475, 'epoch': 3} {'type': 'loss', 'content': 0.08666355907917023, 'timestamp': '2025-09-30 22:36:42.279048', 'step': 15476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:42.342455', 'step': 15476, 'epoch': 3} {'type': 'loss', 'content': 0.03903118893504143, 'timestamp': '2025-09-30 22:36:42.345621', 'step': 15477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:42.404088', 'step': 15477, 'epoch': 3} {'type': 'loss', 'content': 0.05593589320778847, 'timestamp': '2025-09-30 22:36:42.412961', 'step': 15478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:42.474118', 'step': 15478, 'epoch': 3} {'type': 'loss', 'content': 0.10661900043487549, 'timestamp': '2025-09-30 22:36:42.479350', 'step': 15479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:42.537692', 'step': 15479, 'epoch': 3} {'type': 'loss', 'content': 0.066144198179245, 'timestamp': '2025-09-30 22:36:42.544485', 'step': 15480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:42.600543', 'step': 15480, 'epoch': 3} {'type': 'loss', 'content': 0.10213461518287659, 'timestamp': '2025-09-30 22:36:42.604998', 'step': 15481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:42.661701', 'step': 15481, 'epoch': 3} {'type': 'loss', 'content': 0.05599886178970337, 'timestamp': '2025-09-30 22:36:42.672154', 'step': 15482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:42.731341', 'step': 15482, 'epoch': 3} {'type': 'loss', 'content': 0.08556664735078812, 'timestamp': '2025-09-30 22:36:42.735014', 'step': 15483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:42.804545', 'step': 15483, 'epoch': 3} {'type': 'loss', 'content': 0.19527657330036163, 'timestamp': '2025-09-30 22:36:42.812331', 'step': 15484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:36:42.872387', 'step': 15484, 'epoch': 3} {'type': 'loss', 'content': 0.16820870339870453, 'timestamp': '2025-09-30 22:36:42.874991', 'step': 15485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:42.937824', 'step': 15485, 'epoch': 3} {'type': 'loss', 'content': 0.09298904240131378, 'timestamp': '2025-09-30 22:36:42.952568', 'step': 15486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:43.013889', 'step': 15486, 'epoch': 3} {'type': 'loss', 'content': 0.13029998540878296, 'timestamp': '2025-09-30 22:36:43.017349', 'step': 15487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:43.075098', 'step': 15487, 'epoch': 3} {'type': 'loss', 'content': 0.09720496088266373, 'timestamp': '2025-09-30 22:36:43.081545', 'step': 15488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:43.139049', 'step': 15488, 'epoch': 3} {'type': 'loss', 'content': 0.07894182950258255, 'timestamp': '2025-09-30 22:36:43.141935', 'step': 15489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:43.208498', 'step': 15489, 'epoch': 3} {'type': 'loss', 'content': 0.10623395442962646, 'timestamp': '2025-09-30 22:36:43.217687', 'step': 15490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:43.284568', 'step': 15490, 'epoch': 3} {'type': 'loss', 'content': 0.13978099822998047, 'timestamp': '2025-09-30 22:36:43.294515', 'step': 15491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:43.352175', 'step': 15491, 'epoch': 3} {'type': 'loss', 'content': 0.04253821820020676, 'timestamp': '2025-09-30 22:36:43.365160', 'step': 15492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:43.427561', 'step': 15492, 'epoch': 3} {'type': 'loss', 'content': 0.05094633996486664, 'timestamp': '2025-09-30 22:36:43.434870', 'step': 15493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:43.497237', 'step': 15493, 'epoch': 3} {'type': 'loss', 'content': 0.26914188265800476, 'timestamp': '2025-09-30 22:36:43.500982', 'step': 15494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:43.559555', 'step': 15494, 'epoch': 3} {'type': 'loss', 'content': 0.035688113421201706, 'timestamp': '2025-09-30 22:36:43.570188', 'step': 15495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:43.631563', 'step': 15495, 'epoch': 3} {'type': 'loss', 'content': 0.0769178494811058, 'timestamp': '2025-09-30 22:36:43.638628', 'step': 15496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:43.698191', 'step': 15496, 'epoch': 3} {'type': 'loss', 'content': 0.03007272444665432, 'timestamp': '2025-09-30 22:36:43.701498', 'step': 15497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:43.765327', 'step': 15497, 'epoch': 3} {'type': 'loss', 'content': 0.1668589860200882, 'timestamp': '2025-09-30 22:36:43.774559', 'step': 15498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:43.841554', 'step': 15498, 'epoch': 3} {'type': 'loss', 'content': 0.1609543263912201, 'timestamp': '2025-09-30 22:36:43.845091', 'step': 15499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:43.903208', 'step': 15499, 'epoch': 3} {'type': 'loss', 'content': 0.11588182300329208, 'timestamp': '2025-09-30 22:36:43.920952', 'step': 15500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15500', 'timestamp': '2025-09-30 22:36:44.341532', 'step': 15500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:44.402561', 'step': 15500, 'epoch': 3} {'type': 'loss', 'content': 0.05727149918675423, 'timestamp': '2025-09-30 22:36:44.405736', 'step': 15501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:44.471350', 'step': 15501, 'epoch': 3} {'type': 'loss', 'content': 0.0599757544696331, 'timestamp': '2025-09-30 22:36:44.475506', 'step': 15502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:44.534412', 'step': 15502, 'epoch': 3} {'type': 'loss', 'content': 0.06267966330051422, 'timestamp': '2025-09-30 22:36:44.537827', 'step': 15503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:44.595948', 'step': 15503, 'epoch': 3} {'type': 'loss', 'content': 0.1318192034959793, 'timestamp': '2025-09-30 22:36:44.603690', 'step': 15504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:44.663120', 'step': 15504, 'epoch': 3} {'type': 'loss', 'content': 0.13496506214141846, 'timestamp': '2025-09-30 22:36:44.666798', 'step': 15505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:36:44.724637', 'step': 15505, 'epoch': 3} {'type': 'loss', 'content': 0.07718508690595627, 'timestamp': '2025-09-30 22:36:44.732135', 'step': 15506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:44.793464', 'step': 15506, 'epoch': 3} {'type': 'loss', 'content': 0.1295531690120697, 'timestamp': '2025-09-30 22:36:44.796466', 'step': 15507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:44.854412', 'step': 15507, 'epoch': 3} {'type': 'loss', 'content': 0.09903790056705475, 'timestamp': '2025-09-30 22:36:44.860961', 'step': 15508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:44.918073', 'step': 15508, 'epoch': 3} {'type': 'loss', 'content': 0.17824463546276093, 'timestamp': '2025-09-30 22:36:44.925143', 'step': 15509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:44.982696', 'step': 15509, 'epoch': 3} {'type': 'loss', 'content': 0.08211246877908707, 'timestamp': '2025-09-30 22:36:44.985635', 'step': 15510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:45.047678', 'step': 15510, 'epoch': 3} {'type': 'loss', 'content': 0.10751568526029587, 'timestamp': '2025-09-30 22:36:45.050431', 'step': 15511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:36:45.110217', 'step': 15511, 'epoch': 3} {'type': 'loss', 'content': 0.12169572710990906, 'timestamp': '2025-09-30 22:36:45.117096', 'step': 15512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:45.183923', 'step': 15512, 'epoch': 3} {'type': 'loss', 'content': 0.12185875326395035, 'timestamp': '2025-09-30 22:36:45.188322', 'step': 15513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:45.245583', 'step': 15513, 'epoch': 3} {'type': 'loss', 'content': 0.10089810192584991, 'timestamp': '2025-09-30 22:36:45.253930', 'step': 15514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:45.317478', 'step': 15514, 'epoch': 3} {'type': 'loss', 'content': 0.07663644850254059, 'timestamp': '2025-09-30 22:36:45.322647', 'step': 15515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:45.383782', 'step': 15515, 'epoch': 3} {'type': 'loss', 'content': 0.1973041445016861, 'timestamp': '2025-09-30 22:36:45.390100', 'step': 15516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:45.449257', 'step': 15516, 'epoch': 3} {'type': 'loss', 'content': 0.09449092298746109, 'timestamp': '2025-09-30 22:36:45.456974', 'step': 15517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:45.516372', 'step': 15517, 'epoch': 3} {'type': 'loss', 'content': 0.11906630545854568, 'timestamp': '2025-09-30 22:36:45.519500', 'step': 15518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:45.577197', 'step': 15518, 'epoch': 3} {'type': 'loss', 'content': 0.11116104573011398, 'timestamp': '2025-09-30 22:36:45.580071', 'step': 15519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:45.649204', 'step': 15519, 'epoch': 3} {'type': 'loss', 'content': 0.11182980239391327, 'timestamp': '2025-09-30 22:36:45.655414', 'step': 15520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:45.712533', 'step': 15520, 'epoch': 3} {'type': 'loss', 'content': 0.04466792568564415, 'timestamp': '2025-09-30 22:36:45.716286', 'step': 15521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:45.777902', 'step': 15521, 'epoch': 3} {'type': 'loss', 'content': 0.18385230004787445, 'timestamp': '2025-09-30 22:36:45.781271', 'step': 15522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:45.840320', 'step': 15522, 'epoch': 3} {'type': 'loss', 'content': 0.11702359467744827, 'timestamp': '2025-09-30 22:36:45.845194', 'step': 15523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:45.907459', 'step': 15523, 'epoch': 3} {'type': 'loss', 'content': 0.09648161381483078, 'timestamp': '2025-09-30 22:36:45.913982', 'step': 15524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:45.971296', 'step': 15524, 'epoch': 3} {'type': 'loss', 'content': 0.09527240693569183, 'timestamp': '2025-09-30 22:36:45.975842', 'step': 15525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:46.033534', 'step': 15525, 'epoch': 3} {'type': 'loss', 'content': 0.04699784517288208, 'timestamp': '2025-09-30 22:36:46.037482', 'step': 15526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:46.095198', 'step': 15526, 'epoch': 3} {'type': 'loss', 'content': 0.06474059075117111, 'timestamp': '2025-09-30 22:36:46.103421', 'step': 15527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:46.164220', 'step': 15527, 'epoch': 3} {'type': 'loss', 'content': 0.10913866758346558, 'timestamp': '2025-09-30 22:36:46.176417', 'step': 15528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:46.233568', 'step': 15528, 'epoch': 3} {'type': 'loss', 'content': 0.023970942944288254, 'timestamp': '2025-09-30 22:36:46.247865', 'step': 15529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:46.305858', 'step': 15529, 'epoch': 3} {'type': 'loss', 'content': 0.1332704722881317, 'timestamp': '2025-09-30 22:36:46.309853', 'step': 15530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:46.367839', 'step': 15530, 'epoch': 3} {'type': 'loss', 'content': 0.12936824560165405, 'timestamp': '2025-09-30 22:36:46.371769', 'step': 15531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:46.430188', 'step': 15531, 'epoch': 3} {'type': 'loss', 'content': 0.13635189831256866, 'timestamp': '2025-09-30 22:36:46.438808', 'step': 15532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:46.507833', 'step': 15532, 'epoch': 3} {'type': 'loss', 'content': 0.09528641402721405, 'timestamp': '2025-09-30 22:36:46.511270', 'step': 15533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:46.569139', 'step': 15533, 'epoch': 3} {'type': 'loss', 'content': 0.05900014191865921, 'timestamp': '2025-09-30 22:36:46.572443', 'step': 15534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:46.630068', 'step': 15534, 'epoch': 3} {'type': 'loss', 'content': 0.19599610567092896, 'timestamp': '2025-09-30 22:36:46.635221', 'step': 15535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:46.705426', 'step': 15535, 'epoch': 3} {'type': 'loss', 'content': 0.1039363443851471, 'timestamp': '2025-09-30 22:36:46.713480', 'step': 15536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:46.771290', 'step': 15536, 'epoch': 3} {'type': 'loss', 'content': 0.15557439625263214, 'timestamp': '2025-09-30 22:36:46.776645', 'step': 15537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:46.834565', 'step': 15537, 'epoch': 3} {'type': 'loss', 'content': 0.12658719718456268, 'timestamp': '2025-09-30 22:36:46.837997', 'step': 15538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:46.894806', 'step': 15538, 'epoch': 3} {'type': 'loss', 'content': 0.1343054175376892, 'timestamp': '2025-09-30 22:36:46.898657', 'step': 15539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:46.956062', 'step': 15539, 'epoch': 3} {'type': 'loss', 'content': 0.1472126990556717, 'timestamp': '2025-09-30 22:36:46.970935', 'step': 15540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:47.029192', 'step': 15540, 'epoch': 3} {'type': 'loss', 'content': 0.12804822623729706, 'timestamp': '2025-09-30 22:36:47.032420', 'step': 15541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:47.089750', 'step': 15541, 'epoch': 3} {'type': 'loss', 'content': 0.13540472090244293, 'timestamp': '2025-09-30 22:36:47.092221', 'step': 15542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:47.151898', 'step': 15542, 'epoch': 3} {'type': 'loss', 'content': 0.12636524438858032, 'timestamp': '2025-09-30 22:36:47.155521', 'step': 15543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:47.219420', 'step': 15543, 'epoch': 3} {'type': 'loss', 'content': 0.09949322789907455, 'timestamp': '2025-09-30 22:36:47.226335', 'step': 15544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:47.286168', 'step': 15544, 'epoch': 3} {'type': 'loss', 'content': 0.06781460344791412, 'timestamp': '2025-09-30 22:36:47.297162', 'step': 15545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:47.355476', 'step': 15545, 'epoch': 3} {'type': 'loss', 'content': 0.14058221876621246, 'timestamp': '2025-09-30 22:36:47.358530', 'step': 15546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:47.417226', 'step': 15546, 'epoch': 3} {'type': 'loss', 'content': 0.2129736840724945, 'timestamp': '2025-09-30 22:36:47.420699', 'step': 15547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:47.485288', 'step': 15547, 'epoch': 3} {'type': 'loss', 'content': 0.11494091153144836, 'timestamp': '2025-09-30 22:36:47.498441', 'step': 15548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:47.554437', 'step': 15548, 'epoch': 3} {'type': 'loss', 'content': 0.15822461247444153, 'timestamp': '2025-09-30 22:36:47.557279', 'step': 15549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:47.614874', 'step': 15549, 'epoch': 3} {'type': 'loss', 'content': 0.03626910224556923, 'timestamp': '2025-09-30 22:36:47.625448', 'step': 15550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:47.684060', 'step': 15550, 'epoch': 3} {'type': 'loss', 'content': 0.0804576575756073, 'timestamp': '2025-09-30 22:36:47.690882', 'step': 15551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:47.752225', 'step': 15551, 'epoch': 3} {'type': 'loss', 'content': 0.0979742780327797, 'timestamp': '2025-09-30 22:36:47.759838', 'step': 15552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:47.816215', 'step': 15552, 'epoch': 3} {'type': 'loss', 'content': 0.125692680478096, 'timestamp': '2025-09-30 22:36:47.818961', 'step': 15553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:47.883973', 'step': 15553, 'epoch': 3} {'type': 'loss', 'content': 0.14150340855121613, 'timestamp': '2025-09-30 22:36:47.887255', 'step': 15554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:47.954598', 'step': 15554, 'epoch': 3} {'type': 'loss', 'content': 0.09868437051773071, 'timestamp': '2025-09-30 22:36:47.957659', 'step': 15555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:48.025924', 'step': 15555, 'epoch': 3} {'type': 'loss', 'content': 0.11690036207437515, 'timestamp': '2025-09-30 22:36:48.032572', 'step': 15556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:48.088229', 'step': 15556, 'epoch': 3} {'type': 'loss', 'content': 0.07352624833583832, 'timestamp': '2025-09-30 22:36:48.101660', 'step': 15557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:48.160535', 'step': 15557, 'epoch': 3} {'type': 'loss', 'content': 0.11312120407819748, 'timestamp': '2025-09-30 22:36:48.171656', 'step': 15558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:48.230846', 'step': 15558, 'epoch': 3} {'type': 'loss', 'content': 0.17837698757648468, 'timestamp': '2025-09-30 22:36:48.235273', 'step': 15559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:48.298898', 'step': 15559, 'epoch': 3} {'type': 'loss', 'content': 0.06301022320985794, 'timestamp': '2025-09-30 22:36:48.306743', 'step': 15560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:48.371544', 'step': 15560, 'epoch': 3} {'type': 'loss', 'content': 0.06319545954465866, 'timestamp': '2025-09-30 22:36:48.374984', 'step': 15561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:48.432768', 'step': 15561, 'epoch': 3} {'type': 'loss', 'content': 0.03634493052959442, 'timestamp': '2025-09-30 22:36:48.444890', 'step': 15562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:48.509087', 'step': 15562, 'epoch': 3} {'type': 'loss', 'content': 0.10990209132432938, 'timestamp': '2025-09-30 22:36:48.512072', 'step': 15563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:48.570532', 'step': 15563, 'epoch': 3} {'type': 'loss', 'content': 0.12482999265193939, 'timestamp': '2025-09-30 22:36:48.578419', 'step': 15564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:48.643229', 'step': 15564, 'epoch': 3} {'type': 'loss', 'content': 0.11793410778045654, 'timestamp': '2025-09-30 22:36:48.648024', 'step': 15565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:48.707369', 'step': 15565, 'epoch': 3} {'type': 'loss', 'content': 0.0519816093146801, 'timestamp': '2025-09-30 22:36:48.712613', 'step': 15566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:48.777749', 'step': 15566, 'epoch': 3} {'type': 'loss', 'content': 0.15482087433338165, 'timestamp': '2025-09-30 22:36:48.788391', 'step': 15567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:48.844481', 'step': 15567, 'epoch': 3} {'type': 'loss', 'content': 0.12611335515975952, 'timestamp': '2025-09-30 22:36:48.852434', 'step': 15568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:48.908643', 'step': 15568, 'epoch': 3} {'type': 'loss', 'content': 0.14315782487392426, 'timestamp': '2025-09-30 22:36:48.921727', 'step': 15569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:48.979194', 'step': 15569, 'epoch': 3} {'type': 'loss', 'content': 0.09610899537801743, 'timestamp': '2025-09-30 22:36:48.983273', 'step': 15570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:49.047448', 'step': 15570, 'epoch': 3} {'type': 'loss', 'content': 0.17681880295276642, 'timestamp': '2025-09-30 22:36:49.058171', 'step': 15571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:49.116768', 'step': 15571, 'epoch': 3} {'type': 'loss', 'content': 0.036263927817344666, 'timestamp': '2025-09-30 22:36:49.123400', 'step': 15572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:49.180141', 'step': 15572, 'epoch': 3} {'type': 'loss', 'content': 0.13426342606544495, 'timestamp': '2025-09-30 22:36:49.183327', 'step': 15573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.243746', 'step': 15573, 'epoch': 3} {'type': 'loss', 'content': 0.11387991160154343, 'timestamp': '2025-09-30 22:36:49.247537', 'step': 15574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:49.305795', 'step': 15574, 'epoch': 3} {'type': 'loss', 'content': 0.15411362051963806, 'timestamp': '2025-09-30 22:36:49.309627', 'step': 15575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.374236', 'step': 15575, 'epoch': 3} {'type': 'loss', 'content': 0.08359234035015106, 'timestamp': '2025-09-30 22:36:49.381718', 'step': 15576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:49.438880', 'step': 15576, 'epoch': 3} {'type': 'loss', 'content': 0.08747054636478424, 'timestamp': '2025-09-30 22:36:49.442899', 'step': 15577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.508060', 'step': 15577, 'epoch': 3} {'type': 'loss', 'content': 0.07229067385196686, 'timestamp': '2025-09-30 22:36:49.513845', 'step': 15578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:49.571835', 'step': 15578, 'epoch': 3} {'type': 'loss', 'content': 0.07579784095287323, 'timestamp': '2025-09-30 22:36:49.574889', 'step': 15579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.636677', 'step': 15579, 'epoch': 3} {'type': 'loss', 'content': 0.10091588646173477, 'timestamp': '2025-09-30 22:36:49.650362', 'step': 15580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.709163', 'step': 15580, 'epoch': 3} {'type': 'loss', 'content': 0.13545513153076172, 'timestamp': '2025-09-30 22:36:49.713923', 'step': 15581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.773366', 'step': 15581, 'epoch': 3} {'type': 'loss', 'content': 0.15304158627986908, 'timestamp': '2025-09-30 22:36:49.776685', 'step': 15582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.840441', 'step': 15582, 'epoch': 3} {'type': 'loss', 'content': 0.1607634425163269, 'timestamp': '2025-09-30 22:36:49.848449', 'step': 15583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.909631', 'step': 15583, 'epoch': 3} {'type': 'loss', 'content': 0.10214501619338989, 'timestamp': '2025-09-30 22:36:49.915803', 'step': 15584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:49.975711', 'step': 15584, 'epoch': 3} {'type': 'loss', 'content': 0.08932102471590042, 'timestamp': '2025-09-30 22:36:49.979059', 'step': 15585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:50.036682', 'step': 15585, 'epoch': 3} {'type': 'loss', 'content': 0.07418214529752731, 'timestamp': '2025-09-30 22:36:50.039772', 'step': 15586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:50.098157', 'step': 15586, 'epoch': 3} {'type': 'loss', 'content': 0.159184992313385, 'timestamp': '2025-09-30 22:36:50.102166', 'step': 15587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:50.160961', 'step': 15587, 'epoch': 3} {'type': 'loss', 'content': 0.12072542309761047, 'timestamp': '2025-09-30 22:36:50.170061', 'step': 15588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:50.228782', 'step': 15588, 'epoch': 3} {'type': 'loss', 'content': 0.2384043186903, 'timestamp': '2025-09-30 22:36:50.234221', 'step': 15589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:36:50.292142', 'step': 15589, 'epoch': 3} {'type': 'loss', 'content': 0.09018337726593018, 'timestamp': '2025-09-30 22:36:50.297002', 'step': 15590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:50.354452', 'step': 15590, 'epoch': 3} {'type': 'loss', 'content': 0.09038851410150528, 'timestamp': '2025-09-30 22:36:50.364409', 'step': 15591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:50.422340', 'step': 15591, 'epoch': 3} {'type': 'loss', 'content': 0.05233994126319885, 'timestamp': '2025-09-30 22:36:50.429825', 'step': 15592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:50.485938', 'step': 15592, 'epoch': 3} {'type': 'loss', 'content': 0.10598062723875046, 'timestamp': '2025-09-30 22:36:50.488513', 'step': 15593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:50.548459', 'step': 15593, 'epoch': 3} {'type': 'loss', 'content': 0.12603150308132172, 'timestamp': '2025-09-30 22:36:50.551754', 'step': 15594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:50.617470', 'step': 15594, 'epoch': 3} {'type': 'loss', 'content': 0.15824125707149506, 'timestamp': '2025-09-30 22:36:50.620922', 'step': 15595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:50.678260', 'step': 15595, 'epoch': 3} {'type': 'loss', 'content': 0.048286259174346924, 'timestamp': '2025-09-30 22:36:50.684984', 'step': 15596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:50.746495', 'step': 15596, 'epoch': 3} {'type': 'loss', 'content': 0.11343664675951004, 'timestamp': '2025-09-30 22:36:50.749751', 'step': 15597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:50.807725', 'step': 15597, 'epoch': 3} {'type': 'loss', 'content': 0.0960024744272232, 'timestamp': '2025-09-30 22:36:50.810927', 'step': 15598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:50.868526', 'step': 15598, 'epoch': 3} {'type': 'loss', 'content': 0.12207141518592834, 'timestamp': '2025-09-30 22:36:50.873367', 'step': 15599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:50.930759', 'step': 15599, 'epoch': 3} {'type': 'loss', 'content': 0.09775427728891373, 'timestamp': '2025-09-30 22:36:50.937825', 'step': 15600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:51.006533', 'step': 15600, 'epoch': 3} {'type': 'loss', 'content': 0.10695359110832214, 'timestamp': '2025-09-30 22:36:51.010209', 'step': 15601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:51.067945', 'step': 15601, 'epoch': 3} {'type': 'loss', 'content': 0.18782100081443787, 'timestamp': '2025-09-30 22:36:51.072260', 'step': 15602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:51.138917', 'step': 15602, 'epoch': 3} {'type': 'loss', 'content': 0.1133313849568367, 'timestamp': '2025-09-30 22:36:51.143976', 'step': 15603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:51.203997', 'step': 15603, 'epoch': 3} {'type': 'loss', 'content': 0.0722234845161438, 'timestamp': '2025-09-30 22:36:51.211234', 'step': 15604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:51.270849', 'step': 15604, 'epoch': 3} {'type': 'loss', 'content': 0.12283332645893097, 'timestamp': '2025-09-30 22:36:51.274627', 'step': 15605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:51.334320', 'step': 15605, 'epoch': 3} {'type': 'loss', 'content': 0.11372756958007812, 'timestamp': '2025-09-30 22:36:51.351856', 'step': 15606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:51.411012', 'step': 15606, 'epoch': 3} {'type': 'loss', 'content': 0.03769766166806221, 'timestamp': '2025-09-30 22:36:51.421302', 'step': 15607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:51.478885', 'step': 15607, 'epoch': 3} {'type': 'loss', 'content': 0.08790235966444016, 'timestamp': '2025-09-30 22:36:51.485840', 'step': 15608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:51.543559', 'step': 15608, 'epoch': 3} {'type': 'loss', 'content': 0.12450119853019714, 'timestamp': '2025-09-30 22:36:51.548094', 'step': 15609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:51.605763', 'step': 15609, 'epoch': 3} {'type': 'loss', 'content': 0.14663740992546082, 'timestamp': '2025-09-30 22:36:51.609899', 'step': 15610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:51.669275', 'step': 15610, 'epoch': 3} {'type': 'loss', 'content': 0.10025227069854736, 'timestamp': '2025-09-30 22:36:51.673636', 'step': 15611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:51.732132', 'step': 15611, 'epoch': 3} {'type': 'loss', 'content': 0.1188662052154541, 'timestamp': '2025-09-30 22:36:51.745608', 'step': 15612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:51.812884', 'step': 15612, 'epoch': 3} {'type': 'loss', 'content': 0.13970671594142914, 'timestamp': '2025-09-30 22:36:51.815749', 'step': 15613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:51.871854', 'step': 15613, 'epoch': 3} {'type': 'loss', 'content': 0.22233881056308746, 'timestamp': '2025-09-30 22:36:51.887483', 'step': 15614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:51.944117', 'step': 15614, 'epoch': 3} {'type': 'loss', 'content': 0.16207346320152283, 'timestamp': '2025-09-30 22:36:51.946765', 'step': 15615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:52.012380', 'step': 15615, 'epoch': 3} {'type': 'loss', 'content': 0.1012551486492157, 'timestamp': '2025-09-30 22:36:52.018831', 'step': 15616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:52.076838', 'step': 15616, 'epoch': 3} {'type': 'loss', 'content': 0.11363974213600159, 'timestamp': '2025-09-30 22:36:52.087974', 'step': 15617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:52.149193', 'step': 15617, 'epoch': 3} {'type': 'loss', 'content': 0.11080259084701538, 'timestamp': '2025-09-30 22:36:52.153771', 'step': 15618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:52.218057', 'step': 15618, 'epoch': 3} {'type': 'loss', 'content': 0.10286736488342285, 'timestamp': '2025-09-30 22:36:52.221322', 'step': 15619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:52.289193', 'step': 15619, 'epoch': 3} {'type': 'loss', 'content': 0.048545561730861664, 'timestamp': '2025-09-30 22:36:52.296850', 'step': 15620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:52.354263', 'step': 15620, 'epoch': 3} {'type': 'loss', 'content': 0.07739929854869843, 'timestamp': '2025-09-30 22:36:52.357540', 'step': 15621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:52.424059', 'step': 15621, 'epoch': 3} {'type': 'loss', 'content': 0.035996995866298676, 'timestamp': '2025-09-30 22:36:52.427371', 'step': 15622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:52.493817', 'step': 15622, 'epoch': 3} {'type': 'loss', 'content': 0.13192035257816315, 'timestamp': '2025-09-30 22:36:52.511104', 'step': 15623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:52.567655', 'step': 15623, 'epoch': 3} {'type': 'loss', 'content': 0.19090670347213745, 'timestamp': '2025-09-30 22:36:52.585905', 'step': 15624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:52.643344', 'step': 15624, 'epoch': 3} {'type': 'loss', 'content': 0.10426144301891327, 'timestamp': '2025-09-30 22:36:52.647974', 'step': 15625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:52.706114', 'step': 15625, 'epoch': 3} {'type': 'loss', 'content': 0.13248485326766968, 'timestamp': '2025-09-30 22:36:52.709534', 'step': 15626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:52.767823', 'step': 15626, 'epoch': 3} {'type': 'loss', 'content': 0.20584869384765625, 'timestamp': '2025-09-30 22:36:52.770732', 'step': 15627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:52.836703', 'step': 15627, 'epoch': 3} {'type': 'loss', 'content': 0.08117847889661789, 'timestamp': '2025-09-30 22:36:52.851329', 'step': 15628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:52.911522', 'step': 15628, 'epoch': 3} {'type': 'loss', 'content': 0.06600448489189148, 'timestamp': '2025-09-30 22:36:52.919579', 'step': 15629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:52.977599', 'step': 15629, 'epoch': 3} {'type': 'loss', 'content': 0.09517233073711395, 'timestamp': '2025-09-30 22:36:52.981549', 'step': 15630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:53.039798', 'step': 15630, 'epoch': 3} {'type': 'loss', 'content': 0.11243662983179092, 'timestamp': '2025-09-30 22:36:53.043891', 'step': 15631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:53.110643', 'step': 15631, 'epoch': 3} {'type': 'loss', 'content': 0.080530546605587, 'timestamp': '2025-09-30 22:36:53.118731', 'step': 15632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:53.177467', 'step': 15632, 'epoch': 3} {'type': 'loss', 'content': 0.09213066101074219, 'timestamp': '2025-09-30 22:36:53.183747', 'step': 15633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:53.243101', 'step': 15633, 'epoch': 3} {'type': 'loss', 'content': 0.13903988897800446, 'timestamp': '2025-09-30 22:36:53.249963', 'step': 15634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:53.309214', 'step': 15634, 'epoch': 3} {'type': 'loss', 'content': 0.0844036191701889, 'timestamp': '2025-09-30 22:36:53.313754', 'step': 15635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:53.379596', 'step': 15635, 'epoch': 3} {'type': 'loss', 'content': 0.07705888897180557, 'timestamp': '2025-09-30 22:36:53.396390', 'step': 15636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:53.456766', 'step': 15636, 'epoch': 3} {'type': 'loss', 'content': 0.04131075739860535, 'timestamp': '2025-09-30 22:36:53.470606', 'step': 15637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:53.529154', 'step': 15637, 'epoch': 3} {'type': 'loss', 'content': 0.1245761290192604, 'timestamp': '2025-09-30 22:36:53.534415', 'step': 15638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:53.603210', 'step': 15638, 'epoch': 3} {'type': 'loss', 'content': 0.14975404739379883, 'timestamp': '2025-09-30 22:36:53.606440', 'step': 15639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:53.663901', 'step': 15639, 'epoch': 3} {'type': 'loss', 'content': 0.05451066046953201, 'timestamp': '2025-09-30 22:36:53.680801', 'step': 15640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:53.752643', 'step': 15640, 'epoch': 3} {'type': 'loss', 'content': 0.06338238716125488, 'timestamp': '2025-09-30 22:36:53.756744', 'step': 15641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:53.815144', 'step': 15641, 'epoch': 3} {'type': 'loss', 'content': 0.11669889092445374, 'timestamp': '2025-09-30 22:36:53.819884', 'step': 15642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:53.881385', 'step': 15642, 'epoch': 3} {'type': 'loss', 'content': 0.1074162945151329, 'timestamp': '2025-09-30 22:36:53.894366', 'step': 15643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:53.965128', 'step': 15643, 'epoch': 3} {'type': 'loss', 'content': 0.030175931751728058, 'timestamp': '2025-09-30 22:36:53.984031', 'step': 15644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:54.042337', 'step': 15644, 'epoch': 3} {'type': 'loss', 'content': 0.11810436099767685, 'timestamp': '2025-09-30 22:36:54.055583', 'step': 15645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:54.115119', 'step': 15645, 'epoch': 3} {'type': 'loss', 'content': 0.11003098636865616, 'timestamp': '2025-09-30 22:36:54.119433', 'step': 15646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:54.179832', 'step': 15646, 'epoch': 3} {'type': 'loss', 'content': 0.1096692904829979, 'timestamp': '2025-09-30 22:36:54.193468', 'step': 15647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:54.252163', 'step': 15647, 'epoch': 3} {'type': 'loss', 'content': 0.05539004132151604, 'timestamp': '2025-09-30 22:36:54.259388', 'step': 15648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:54.317321', 'step': 15648, 'epoch': 3} {'type': 'loss', 'content': 0.11124413460493088, 'timestamp': '2025-09-30 22:36:54.320935', 'step': 15649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:54.391233', 'step': 15649, 'epoch': 3} {'type': 'loss', 'content': 0.09484831988811493, 'timestamp': '2025-09-30 22:36:54.393960', 'step': 15650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:54.451157', 'step': 15650, 'epoch': 3} {'type': 'loss', 'content': 0.11311804503202438, 'timestamp': '2025-09-30 22:36:54.454760', 'step': 15651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:54.511503', 'step': 15651, 'epoch': 3} {'type': 'loss', 'content': 0.07946249842643738, 'timestamp': '2025-09-30 22:36:54.533401', 'step': 15652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:54.598209', 'step': 15652, 'epoch': 3} {'type': 'loss', 'content': 0.13606351613998413, 'timestamp': '2025-09-30 22:36:54.601343', 'step': 15653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:54.666175', 'step': 15653, 'epoch': 3} {'type': 'loss', 'content': 0.14594288170337677, 'timestamp': '2025-09-30 22:36:54.678949', 'step': 15654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:54.745534', 'step': 15654, 'epoch': 3} {'type': 'loss', 'content': 0.17268261313438416, 'timestamp': '2025-09-30 22:36:54.750555', 'step': 15655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:54.807604', 'step': 15655, 'epoch': 3} {'type': 'loss', 'content': 0.04411415383219719, 'timestamp': '2025-09-30 22:36:54.815476', 'step': 15656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:54.871479', 'step': 15656, 'epoch': 3} {'type': 'loss', 'content': 0.08461884409189224, 'timestamp': '2025-09-30 22:36:54.885386', 'step': 15657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:54.955956', 'step': 15657, 'epoch': 3} {'type': 'loss', 'content': 0.10588584840297699, 'timestamp': '2025-09-30 22:36:54.959661', 'step': 15658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:55.018769', 'step': 15658, 'epoch': 3} {'type': 'loss', 'content': 0.11995545029640198, 'timestamp': '2025-09-30 22:36:55.023308', 'step': 15659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:55.091835', 'step': 15659, 'epoch': 3} {'type': 'loss', 'content': 0.037089016288518906, 'timestamp': '2025-09-30 22:36:55.099849', 'step': 15660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:55.157580', 'step': 15660, 'epoch': 3} {'type': 'loss', 'content': 0.09464170783758163, 'timestamp': '2025-09-30 22:36:55.169403', 'step': 15661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:55.237119', 'step': 15661, 'epoch': 3} {'type': 'loss', 'content': 0.06517847627401352, 'timestamp': '2025-09-30 22:36:55.245100', 'step': 15662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:55.310958', 'step': 15662, 'epoch': 3} {'type': 'loss', 'content': 0.24426928162574768, 'timestamp': '2025-09-30 22:36:55.313509', 'step': 15663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:55.371374', 'step': 15663, 'epoch': 3} {'type': 'loss', 'content': 0.1519991159439087, 'timestamp': '2025-09-30 22:36:55.378397', 'step': 15664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:55.436118', 'step': 15664, 'epoch': 3} {'type': 'loss', 'content': 0.046047087758779526, 'timestamp': '2025-09-30 22:36:55.440452', 'step': 15665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:55.497463', 'step': 15665, 'epoch': 3} {'type': 'loss', 'content': 0.08909069746732712, 'timestamp': '2025-09-30 22:36:55.500647', 'step': 15666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:55.567419', 'step': 15666, 'epoch': 3} {'type': 'loss', 'content': 0.13793502748012543, 'timestamp': '2025-09-30 22:36:55.575046', 'step': 15667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:55.638334', 'step': 15667, 'epoch': 3} {'type': 'loss', 'content': 0.12450289726257324, 'timestamp': '2025-09-30 22:36:55.644752', 'step': 15668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:55.702117', 'step': 15668, 'epoch': 3} {'type': 'loss', 'content': 0.1279112994670868, 'timestamp': '2025-09-30 22:36:55.709202', 'step': 15669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:55.766317', 'step': 15669, 'epoch': 3} {'type': 'loss', 'content': 0.1292032152414322, 'timestamp': '2025-09-30 22:36:55.769905', 'step': 15670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:55.834994', 'step': 15670, 'epoch': 3} {'type': 'loss', 'content': 0.13884027302265167, 'timestamp': '2025-09-30 22:36:55.837751', 'step': 15671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:55.895783', 'step': 15671, 'epoch': 3} {'type': 'loss', 'content': 0.1583191156387329, 'timestamp': '2025-09-30 22:36:55.903322', 'step': 15672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:55.963544', 'step': 15672, 'epoch': 3} {'type': 'loss', 'content': 0.12776371836662292, 'timestamp': '2025-09-30 22:36:55.974895', 'step': 15673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:56.043107', 'step': 15673, 'epoch': 3} {'type': 'loss', 'content': 0.14982344210147858, 'timestamp': '2025-09-30 22:36:56.048536', 'step': 15674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:56.107987', 'step': 15674, 'epoch': 3} {'type': 'loss', 'content': 0.07797779887914658, 'timestamp': '2025-09-30 22:36:56.127326', 'step': 15675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:56.188345', 'step': 15675, 'epoch': 3} {'type': 'loss', 'content': 0.07968135923147202, 'timestamp': '2025-09-30 22:36:56.195551', 'step': 15676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:56.253063', 'step': 15676, 'epoch': 3} {'type': 'loss', 'content': 0.14516620337963104, 'timestamp': '2025-09-30 22:36:56.256315', 'step': 15677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:56.313682', 'step': 15677, 'epoch': 3} {'type': 'loss', 'content': 0.25553837418556213, 'timestamp': '2025-09-30 22:36:56.316449', 'step': 15678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:56.374076', 'step': 15678, 'epoch': 3} {'type': 'loss', 'content': 0.048423491418361664, 'timestamp': '2025-09-30 22:36:56.376638', 'step': 15679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:56.439146', 'step': 15679, 'epoch': 3} {'type': 'loss', 'content': 0.031473200768232346, 'timestamp': '2025-09-30 22:36:56.446300', 'step': 15680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:56.512297', 'step': 15680, 'epoch': 3} {'type': 'loss', 'content': 0.07042673230171204, 'timestamp': '2025-09-30 22:36:56.516102', 'step': 15681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:56.572614', 'step': 15681, 'epoch': 3} {'type': 'loss', 'content': 0.09015877544879913, 'timestamp': '2025-09-30 22:36:56.576252', 'step': 15682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:56.640690', 'step': 15682, 'epoch': 3} {'type': 'loss', 'content': 0.12510016560554504, 'timestamp': '2025-09-30 22:36:56.646847', 'step': 15683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:56.707043', 'step': 15683, 'epoch': 3} {'type': 'loss', 'content': 0.05190179869532585, 'timestamp': '2025-09-30 22:36:56.713511', 'step': 15684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:56.769613', 'step': 15684, 'epoch': 3} {'type': 'loss', 'content': 0.17100520431995392, 'timestamp': '2025-09-30 22:36:56.775841', 'step': 15685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:56.839568', 'step': 15685, 'epoch': 3} {'type': 'loss', 'content': 0.13429845869541168, 'timestamp': '2025-09-30 22:36:56.842231', 'step': 15686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:56.900139', 'step': 15686, 'epoch': 3} {'type': 'loss', 'content': 0.05159303545951843, 'timestamp': '2025-09-30 22:36:56.903195', 'step': 15687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:56.967363', 'step': 15687, 'epoch': 3} {'type': 'loss', 'content': 0.13063888251781464, 'timestamp': '2025-09-30 22:36:56.973713', 'step': 15688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:57.038710', 'step': 15688, 'epoch': 3} {'type': 'loss', 'content': 0.0986102893948555, 'timestamp': '2025-09-30 22:36:57.045358', 'step': 15689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:57.107077', 'step': 15689, 'epoch': 3} {'type': 'loss', 'content': 0.07661550492048264, 'timestamp': '2025-09-30 22:36:57.118421', 'step': 15690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:57.176227', 'step': 15690, 'epoch': 3} {'type': 'loss', 'content': 0.11007563769817352, 'timestamp': '2025-09-30 22:36:57.182446', 'step': 15691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:57.243062', 'step': 15691, 'epoch': 3} {'type': 'loss', 'content': 0.04843902215361595, 'timestamp': '2025-09-30 22:36:57.249416', 'step': 15692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:57.305663', 'step': 15692, 'epoch': 3} {'type': 'loss', 'content': 0.09666716307401657, 'timestamp': '2025-09-30 22:36:57.309097', 'step': 15693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:57.374209', 'step': 15693, 'epoch': 3} {'type': 'loss', 'content': 0.09194901585578918, 'timestamp': '2025-09-30 22:36:57.382715', 'step': 15694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:57.452193', 'step': 15694, 'epoch': 3} {'type': 'loss', 'content': 0.04236948490142822, 'timestamp': '2025-09-30 22:36:57.454775', 'step': 15695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:57.515341', 'step': 15695, 'epoch': 3} {'type': 'loss', 'content': 0.07901967316865921, 'timestamp': '2025-09-30 22:36:57.521165', 'step': 15696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:57.588742', 'step': 15696, 'epoch': 3} {'type': 'loss', 'content': 0.1813005805015564, 'timestamp': '2025-09-30 22:36:57.591458', 'step': 15697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:36:57.648871', 'step': 15697, 'epoch': 3} {'type': 'loss', 'content': 0.04443535581231117, 'timestamp': '2025-09-30 22:36:57.653980', 'step': 15698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:57.712626', 'step': 15698, 'epoch': 3} {'type': 'loss', 'content': 0.15082821249961853, 'timestamp': '2025-09-30 22:36:57.719617', 'step': 15699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:57.776891', 'step': 15699, 'epoch': 3} {'type': 'loss', 'content': 0.12517473101615906, 'timestamp': '2025-09-30 22:36:57.783330', 'step': 15700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:57.844066', 'step': 15700, 'epoch': 3} {'type': 'loss', 'content': 0.05385212600231171, 'timestamp': '2025-09-30 22:36:57.851299', 'step': 15701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:57.909563', 'step': 15701, 'epoch': 3} {'type': 'loss', 'content': 0.09209589660167694, 'timestamp': '2025-09-30 22:36:57.912009', 'step': 15702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:57.970690', 'step': 15702, 'epoch': 3} {'type': 'loss', 'content': 0.06286786496639252, 'timestamp': '2025-09-30 22:36:57.976269', 'step': 15703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:36:58.036434', 'step': 15703, 'epoch': 3} {'type': 'loss', 'content': 0.09165728837251663, 'timestamp': '2025-09-30 22:36:58.044562', 'step': 15704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:58.101250', 'step': 15704, 'epoch': 3} {'type': 'loss', 'content': 0.07242387533187866, 'timestamp': '2025-09-30 22:36:58.103565', 'step': 15705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:58.160664', 'step': 15705, 'epoch': 3} {'type': 'loss', 'content': 0.15524619817733765, 'timestamp': '2025-09-30 22:36:58.163419', 'step': 15706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:58.225730', 'step': 15706, 'epoch': 3} {'type': 'loss', 'content': 0.12630420923233032, 'timestamp': '2025-09-30 22:36:58.231508', 'step': 15707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:58.288279', 'step': 15707, 'epoch': 3} {'type': 'loss', 'content': 0.06962151825428009, 'timestamp': '2025-09-30 22:36:58.294571', 'step': 15708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:58.350709', 'step': 15708, 'epoch': 3} {'type': 'loss', 'content': 0.06007551774382591, 'timestamp': '2025-09-30 22:36:58.354033', 'step': 15709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:58.414804', 'step': 15709, 'epoch': 3} {'type': 'loss', 'content': 0.16203686594963074, 'timestamp': '2025-09-30 22:36:58.419775', 'step': 15710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:58.477760', 'step': 15710, 'epoch': 3} {'type': 'loss', 'content': 0.12562184035778046, 'timestamp': '2025-09-30 22:36:58.482965', 'step': 15711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:58.541550', 'step': 15711, 'epoch': 3} {'type': 'loss', 'content': 0.0825619250535965, 'timestamp': '2025-09-30 22:36:58.547958', 'step': 15712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:58.605912', 'step': 15712, 'epoch': 3} {'type': 'loss', 'content': 0.11010073125362396, 'timestamp': '2025-09-30 22:36:58.608916', 'step': 15713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:58.674452', 'step': 15713, 'epoch': 3} {'type': 'loss', 'content': 0.13640861213207245, 'timestamp': '2025-09-30 22:36:58.677227', 'step': 15714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:58.735504', 'step': 15714, 'epoch': 3} {'type': 'loss', 'content': 0.07598207890987396, 'timestamp': '2025-09-30 22:36:58.738013', 'step': 15715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:58.795381', 'step': 15715, 'epoch': 3} {'type': 'loss', 'content': 0.11995422095060349, 'timestamp': '2025-09-30 22:36:58.802096', 'step': 15716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:58.861857', 'step': 15716, 'epoch': 3} {'type': 'loss', 'content': 0.057371415197849274, 'timestamp': '2025-09-30 22:36:58.864454', 'step': 15717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:58.924256', 'step': 15717, 'epoch': 3} {'type': 'loss', 'content': 0.05604787543416023, 'timestamp': '2025-09-30 22:36:58.928643', 'step': 15718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:58.986351', 'step': 15718, 'epoch': 3} {'type': 'loss', 'content': 0.1074349582195282, 'timestamp': '2025-09-30 22:36:58.994155', 'step': 15719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.051888', 'step': 15719, 'epoch': 3} {'type': 'loss', 'content': 0.0724480152130127, 'timestamp': '2025-09-30 22:36:59.059205', 'step': 15720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.115659', 'step': 15720, 'epoch': 3} {'type': 'loss', 'content': 0.12341978400945663, 'timestamp': '2025-09-30 22:36:59.118391', 'step': 15721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:59.176073', 'step': 15721, 'epoch': 3} {'type': 'loss', 'content': 0.11641893535852432, 'timestamp': '2025-09-30 22:36:59.178364', 'step': 15722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:59.237574', 'step': 15722, 'epoch': 3} {'type': 'loss', 'content': 0.14560699462890625, 'timestamp': '2025-09-30 22:36:59.240248', 'step': 15723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:59.297669', 'step': 15723, 'epoch': 3} {'type': 'loss', 'content': 0.06072608754038811, 'timestamp': '2025-09-30 22:36:59.321730', 'step': 15724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.392715', 'step': 15724, 'epoch': 3} {'type': 'loss', 'content': 0.020768264308571815, 'timestamp': '2025-09-30 22:36:59.396133', 'step': 15725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.454810', 'step': 15725, 'epoch': 3} {'type': 'loss', 'content': 0.10299775004386902, 'timestamp': '2025-09-30 22:36:59.458905', 'step': 15726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:59.515529', 'step': 15726, 'epoch': 3} {'type': 'loss', 'content': 0.11597128957509995, 'timestamp': '2025-09-30 22:36:59.518290', 'step': 15727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:36:59.574945', 'step': 15727, 'epoch': 3} {'type': 'loss', 'content': 0.14572618901729584, 'timestamp': '2025-09-30 22:36:59.581496', 'step': 15728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.637527', 'step': 15728, 'epoch': 3} {'type': 'loss', 'content': 0.1511135697364807, 'timestamp': '2025-09-30 22:36:59.643150', 'step': 15729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.700197', 'step': 15729, 'epoch': 3} {'type': 'loss', 'content': 0.1155996024608612, 'timestamp': '2025-09-30 22:36:59.708676', 'step': 15730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.765476', 'step': 15730, 'epoch': 3} {'type': 'loss', 'content': 0.06290298700332642, 'timestamp': '2025-09-30 22:36:59.767917', 'step': 15731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:36:59.824968', 'step': 15731, 'epoch': 3} {'type': 'loss', 'content': 0.14780190587043762, 'timestamp': '2025-09-30 22:36:59.831594', 'step': 15732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:36:59.888647', 'step': 15732, 'epoch': 3} {'type': 'loss', 'content': 0.05178403854370117, 'timestamp': '2025-09-30 22:36:59.891493', 'step': 15733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:36:59.950736', 'step': 15733, 'epoch': 3} {'type': 'loss', 'content': 0.07178187370300293, 'timestamp': '2025-09-30 22:36:59.953048', 'step': 15734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:37:00.011512', 'step': 15734, 'epoch': 3} {'type': 'loss', 'content': 0.06567589938640594, 'timestamp': '2025-09-30 22:37:00.015747', 'step': 15735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:00.072744', 'step': 15735, 'epoch': 3} {'type': 'loss', 'content': 0.09852620959281921, 'timestamp': '2025-09-30 22:37:00.081583', 'step': 15736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:00.139651', 'step': 15736, 'epoch': 3} {'type': 'loss', 'content': 0.149228036403656, 'timestamp': '2025-09-30 22:37:00.153951', 'step': 15737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:00.210932', 'step': 15737, 'epoch': 3} {'type': 'loss', 'content': 0.09926357865333557, 'timestamp': '2025-09-30 22:37:00.213541', 'step': 15738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:00.274711', 'step': 15738, 'epoch': 3} {'type': 'loss', 'content': 0.1346295326948166, 'timestamp': '2025-09-30 22:37:00.277558', 'step': 15739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:00.336341', 'step': 15739, 'epoch': 3} {'type': 'loss', 'content': 0.09335151314735413, 'timestamp': '2025-09-30 22:37:00.345642', 'step': 15740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:00.401243', 'step': 15740, 'epoch': 3} {'type': 'loss', 'content': 0.11249417811632156, 'timestamp': '2025-09-30 22:37:00.403983', 'step': 15741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:00.461162', 'step': 15741, 'epoch': 3} {'type': 'loss', 'content': 0.07507602125406265, 'timestamp': '2025-09-30 22:37:00.463895', 'step': 15742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:00.520825', 'step': 15742, 'epoch': 3} {'type': 'loss', 'content': 0.07234702259302139, 'timestamp': '2025-09-30 22:37:00.523031', 'step': 15743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:00.583516', 'step': 15743, 'epoch': 3} {'type': 'loss', 'content': 0.1283690333366394, 'timestamp': '2025-09-30 22:37:00.592043', 'step': 15744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:00.650221', 'step': 15744, 'epoch': 3} {'type': 'loss', 'content': 0.143269345164299, 'timestamp': '2025-09-30 22:37:00.653280', 'step': 15745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:00.710588', 'step': 15745, 'epoch': 3} {'type': 'loss', 'content': 0.09423955529928207, 'timestamp': '2025-09-30 22:37:00.713142', 'step': 15746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:00.772569', 'step': 15746, 'epoch': 3} {'type': 'loss', 'content': 0.19045476615428925, 'timestamp': '2025-09-30 22:37:00.775392', 'step': 15747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:00.838437', 'step': 15747, 'epoch': 3} {'type': 'loss', 'content': 0.1326640099287033, 'timestamp': '2025-09-30 22:37:00.844709', 'step': 15748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:00.913867', 'step': 15748, 'epoch': 3} {'type': 'loss', 'content': 0.09588993340730667, 'timestamp': '2025-09-30 22:37:00.916663', 'step': 15749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:00.973502', 'step': 15749, 'epoch': 3} {'type': 'loss', 'content': 0.06782302260398865, 'timestamp': '2025-09-30 22:37:00.976174', 'step': 15750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.034251', 'step': 15750, 'epoch': 3} {'type': 'loss', 'content': 0.10157571732997894, 'timestamp': '2025-09-30 22:37:01.037025', 'step': 15751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.093388', 'step': 15751, 'epoch': 3} {'type': 'loss', 'content': 0.09797963500022888, 'timestamp': '2025-09-30 22:37:01.101100', 'step': 15752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.162192', 'step': 15752, 'epoch': 3} {'type': 'loss', 'content': 0.0616634301841259, 'timestamp': '2025-09-30 22:37:01.173251', 'step': 15753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:01.229744', 'step': 15753, 'epoch': 3} {'type': 'loss', 'content': 0.07013996690511703, 'timestamp': '2025-09-30 22:37:01.232304', 'step': 15754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:01.288913', 'step': 15754, 'epoch': 3} {'type': 'loss', 'content': 0.07354345917701721, 'timestamp': '2025-09-30 22:37:01.291643', 'step': 15755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:01.350291', 'step': 15755, 'epoch': 3} {'type': 'loss', 'content': 0.049160026013851166, 'timestamp': '2025-09-30 22:37:01.356506', 'step': 15756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.420240', 'step': 15756, 'epoch': 3} {'type': 'loss', 'content': 0.1501779556274414, 'timestamp': '2025-09-30 22:37:01.423608', 'step': 15757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:01.483480', 'step': 15757, 'epoch': 3} {'type': 'loss', 'content': 0.08221597224473953, 'timestamp': '2025-09-30 22:37:01.486637', 'step': 15758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:01.543995', 'step': 15758, 'epoch': 3} {'type': 'loss', 'content': 0.18992778658866882, 'timestamp': '2025-09-30 22:37:01.546925', 'step': 15759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.603787', 'step': 15759, 'epoch': 3} {'type': 'loss', 'content': 0.1329789012670517, 'timestamp': '2025-09-30 22:37:01.610102', 'step': 15760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:01.666592', 'step': 15760, 'epoch': 3} {'type': 'loss', 'content': 0.22048014402389526, 'timestamp': '2025-09-30 22:37:01.669866', 'step': 15761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.728573', 'step': 15761, 'epoch': 3} {'type': 'loss', 'content': 0.058291394263505936, 'timestamp': '2025-09-30 22:37:01.734873', 'step': 15762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.793117', 'step': 15762, 'epoch': 3} {'type': 'loss', 'content': 0.14412803947925568, 'timestamp': '2025-09-30 22:37:01.796134', 'step': 15763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:01.859237', 'step': 15763, 'epoch': 3} {'type': 'loss', 'content': 0.09607630968093872, 'timestamp': '2025-09-30 22:37:01.865761', 'step': 15764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:01.937803', 'step': 15764, 'epoch': 3} {'type': 'loss', 'content': 0.11632830649614334, 'timestamp': '2025-09-30 22:37:01.940840', 'step': 15765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:02.000421', 'step': 15765, 'epoch': 3} {'type': 'loss', 'content': 0.12501895427703857, 'timestamp': '2025-09-30 22:37:02.003783', 'step': 15766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:02.061675', 'step': 15766, 'epoch': 3} {'type': 'loss', 'content': 0.056234270334243774, 'timestamp': '2025-09-30 22:37:02.065056', 'step': 15767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:02.124649', 'step': 15767, 'epoch': 3} {'type': 'loss', 'content': 0.10452603548765182, 'timestamp': '2025-09-30 22:37:02.130477', 'step': 15768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:02.188698', 'step': 15768, 'epoch': 3} {'type': 'loss', 'content': 0.10332536697387695, 'timestamp': '2025-09-30 22:37:02.192036', 'step': 15769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:02.252789', 'step': 15769, 'epoch': 3} {'type': 'loss', 'content': 0.08671081811189651, 'timestamp': '2025-09-30 22:37:02.256513', 'step': 15770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:02.314387', 'step': 15770, 'epoch': 3} {'type': 'loss', 'content': 0.1473785936832428, 'timestamp': '2025-09-30 22:37:02.316877', 'step': 15771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:02.383437', 'step': 15771, 'epoch': 3} {'type': 'loss', 'content': 0.09266587346792221, 'timestamp': '2025-09-30 22:37:02.391492', 'step': 15772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:02.447598', 'step': 15772, 'epoch': 3} {'type': 'loss', 'content': 0.005931338761001825, 'timestamp': '2025-09-30 22:37:02.451218', 'step': 15773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:02.509259', 'step': 15773, 'epoch': 3} {'type': 'loss', 'content': 0.09990609437227249, 'timestamp': '2025-09-30 22:37:02.512822', 'step': 15774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:02.579053', 'step': 15774, 'epoch': 3} {'type': 'loss', 'content': 0.0864955261349678, 'timestamp': '2025-09-30 22:37:02.582614', 'step': 15775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:02.639746', 'step': 15775, 'epoch': 3} {'type': 'loss', 'content': 0.09657714515924454, 'timestamp': '2025-09-30 22:37:02.647258', 'step': 15776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:02.704455', 'step': 15776, 'epoch': 3} {'type': 'loss', 'content': 0.09005560725927353, 'timestamp': '2025-09-30 22:37:02.708609', 'step': 15777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:02.780936', 'step': 15777, 'epoch': 3} {'type': 'loss', 'content': 0.1627073585987091, 'timestamp': '2025-09-30 22:37:02.783942', 'step': 15778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:02.847115', 'step': 15778, 'epoch': 3} {'type': 'loss', 'content': 0.046397458761930466, 'timestamp': '2025-09-30 22:37:02.850602', 'step': 15779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:02.909754', 'step': 15779, 'epoch': 3} {'type': 'loss', 'content': 0.09827876091003418, 'timestamp': '2025-09-30 22:37:02.915962', 'step': 15780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:02.976093', 'step': 15780, 'epoch': 3} {'type': 'loss', 'content': 0.051144663244485855, 'timestamp': '2025-09-30 22:37:02.986123', 'step': 15781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:03.050764', 'step': 15781, 'epoch': 3} {'type': 'loss', 'content': 0.09364574402570724, 'timestamp': '2025-09-30 22:37:03.055343', 'step': 15782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:03.112933', 'step': 15782, 'epoch': 3} {'type': 'loss', 'content': 0.24262464046478271, 'timestamp': '2025-09-30 22:37:03.127863', 'step': 15783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:03.186769', 'step': 15783, 'epoch': 3} {'type': 'loss', 'content': 0.08117706328630447, 'timestamp': '2025-09-30 22:37:03.194069', 'step': 15784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:03.250049', 'step': 15784, 'epoch': 3} {'type': 'loss', 'content': 0.1341799646615982, 'timestamp': '2025-09-30 22:37:03.253241', 'step': 15785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:03.314982', 'step': 15785, 'epoch': 3} {'type': 'loss', 'content': 0.13830731809139252, 'timestamp': '2025-09-30 22:37:03.327750', 'step': 15786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:03.386409', 'step': 15786, 'epoch': 3} {'type': 'loss', 'content': 0.09459589421749115, 'timestamp': '2025-09-30 22:37:03.391475', 'step': 15787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:03.451144', 'step': 15787, 'epoch': 3} {'type': 'loss', 'content': 0.16760359704494476, 'timestamp': '2025-09-30 22:37:03.462754', 'step': 15788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:03.530336', 'step': 15788, 'epoch': 3} {'type': 'loss', 'content': 0.11869505047798157, 'timestamp': '2025-09-30 22:37:03.541957', 'step': 15789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:03.601124', 'step': 15789, 'epoch': 3} {'type': 'loss', 'content': 0.12130403518676758, 'timestamp': '2025-09-30 22:37:03.614769', 'step': 15790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:03.678630', 'step': 15790, 'epoch': 3} {'type': 'loss', 'content': 0.04835560917854309, 'timestamp': '2025-09-30 22:37:03.682172', 'step': 15791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:03.739167', 'step': 15791, 'epoch': 3} {'type': 'loss', 'content': 0.11686934530735016, 'timestamp': '2025-09-30 22:37:03.747461', 'step': 15792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:03.807453', 'step': 15792, 'epoch': 3} {'type': 'loss', 'content': 0.08580417931079865, 'timestamp': '2025-09-30 22:37:03.811996', 'step': 15793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:03.868726', 'step': 15793, 'epoch': 3} {'type': 'loss', 'content': 0.20040416717529297, 'timestamp': '2025-09-30 22:37:03.874587', 'step': 15794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:03.933528', 'step': 15794, 'epoch': 3} {'type': 'loss', 'content': 0.08060672879219055, 'timestamp': '2025-09-30 22:37:03.937463', 'step': 15795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:03.996617', 'step': 15795, 'epoch': 3} {'type': 'loss', 'content': 0.1506866067647934, 'timestamp': '2025-09-30 22:37:04.017241', 'step': 15796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:04.076421', 'step': 15796, 'epoch': 3} {'type': 'loss', 'content': 0.09463493525981903, 'timestamp': '2025-09-30 22:37:04.090292', 'step': 15797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:04.159108', 'step': 15797, 'epoch': 3} {'type': 'loss', 'content': 0.13001380860805511, 'timestamp': '2025-09-30 22:37:04.163756', 'step': 15798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:04.220789', 'step': 15798, 'epoch': 3} {'type': 'loss', 'content': 0.06970500200986862, 'timestamp': '2025-09-30 22:37:04.232705', 'step': 15799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:04.290366', 'step': 15799, 'epoch': 3} {'type': 'loss', 'content': 0.04318268969655037, 'timestamp': '2025-09-30 22:37:04.297179', 'step': 15800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:04.354397', 'step': 15800, 'epoch': 3} {'type': 'loss', 'content': 0.044615983963012695, 'timestamp': '2025-09-30 22:37:04.364441', 'step': 15801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:04.429747', 'step': 15801, 'epoch': 3} {'type': 'loss', 'content': 0.14235727488994598, 'timestamp': '2025-09-30 22:37:04.433858', 'step': 15802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:04.503097', 'step': 15802, 'epoch': 3} {'type': 'loss', 'content': 0.11490420252084732, 'timestamp': '2025-09-30 22:37:04.506215', 'step': 15803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:04.566471', 'step': 15803, 'epoch': 3} {'type': 'loss', 'content': 0.13207212090492249, 'timestamp': '2025-09-30 22:37:04.586012', 'step': 15804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:04.645229', 'step': 15804, 'epoch': 3} {'type': 'loss', 'content': 0.09163744747638702, 'timestamp': '2025-09-30 22:37:04.656463', 'step': 15805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:04.715100', 'step': 15805, 'epoch': 3} {'type': 'loss', 'content': 0.2350829392671585, 'timestamp': '2025-09-30 22:37:04.722659', 'step': 15806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:04.780027', 'step': 15806, 'epoch': 3} {'type': 'loss', 'content': 0.13150165975093842, 'timestamp': '2025-09-30 22:37:04.793650', 'step': 15807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:04.856270', 'step': 15807, 'epoch': 3} {'type': 'loss', 'content': 0.16688944399356842, 'timestamp': '2025-09-30 22:37:04.863456', 'step': 15808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:04.920918', 'step': 15808, 'epoch': 3} {'type': 'loss', 'content': 0.2191186398267746, 'timestamp': '2025-09-30 22:37:04.924341', 'step': 15809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:04.985160', 'step': 15809, 'epoch': 3} {'type': 'loss', 'content': 0.03776485100388527, 'timestamp': '2025-09-30 22:37:04.987862', 'step': 15810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:05.054986', 'step': 15810, 'epoch': 3} {'type': 'loss', 'content': 0.10199528932571411, 'timestamp': '2025-09-30 22:37:05.059586', 'step': 15811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:05.118876', 'step': 15811, 'epoch': 3} {'type': 'loss', 'content': 0.06875519454479218, 'timestamp': '2025-09-30 22:37:05.126540', 'step': 15812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:05.183154', 'step': 15812, 'epoch': 3} {'type': 'loss', 'content': 0.1096377745270729, 'timestamp': '2025-09-30 22:37:05.187251', 'step': 15813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:05.247521', 'step': 15813, 'epoch': 3} {'type': 'loss', 'content': 0.116111621260643, 'timestamp': '2025-09-30 22:37:05.253115', 'step': 15814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:05.312283', 'step': 15814, 'epoch': 3} {'type': 'loss', 'content': 0.08786383271217346, 'timestamp': '2025-09-30 22:37:05.317296', 'step': 15815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:05.384881', 'step': 15815, 'epoch': 3} {'type': 'loss', 'content': 0.09238865971565247, 'timestamp': '2025-09-30 22:37:05.392464', 'step': 15816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:05.452755', 'step': 15816, 'epoch': 3} {'type': 'loss', 'content': 0.06629611551761627, 'timestamp': '2025-09-30 22:37:05.456406', 'step': 15817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:05.513542', 'step': 15817, 'epoch': 3} {'type': 'loss', 'content': 0.07948489487171173, 'timestamp': '2025-09-30 22:37:05.519479', 'step': 15818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:05.577114', 'step': 15818, 'epoch': 3} {'type': 'loss', 'content': 0.14829184114933014, 'timestamp': '2025-09-30 22:37:05.582047', 'step': 15819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:05.640851', 'step': 15819, 'epoch': 3} {'type': 'loss', 'content': 0.20562785863876343, 'timestamp': '2025-09-30 22:37:05.647549', 'step': 15820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:05.711308', 'step': 15820, 'epoch': 3} {'type': 'loss', 'content': 0.0762716606259346, 'timestamp': '2025-09-30 22:37:05.715471', 'step': 15821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:05.792414', 'step': 15821, 'epoch': 3} {'type': 'loss', 'content': 0.11073064804077148, 'timestamp': '2025-09-30 22:37:05.797294', 'step': 15822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:05.856601', 'step': 15822, 'epoch': 3} {'type': 'loss', 'content': 0.05623818188905716, 'timestamp': '2025-09-30 22:37:05.862157', 'step': 15823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:05.920712', 'step': 15823, 'epoch': 3} {'type': 'loss', 'content': 0.10311529785394669, 'timestamp': '2025-09-30 22:37:05.929199', 'step': 15824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:05.987266', 'step': 15824, 'epoch': 3} {'type': 'loss', 'content': 0.11785085499286652, 'timestamp': '2025-09-30 22:37:05.992943', 'step': 15825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:06.053385', 'step': 15825, 'epoch': 3} {'type': 'loss', 'content': 0.12865543365478516, 'timestamp': '2025-09-30 22:37:06.057860', 'step': 15826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:06.150304', 'step': 15826, 'epoch': 3} {'type': 'loss', 'content': 0.09278921782970428, 'timestamp': '2025-09-30 22:37:06.153640', 'step': 15827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:06.220586', 'step': 15827, 'epoch': 3} {'type': 'loss', 'content': 0.08066528290510178, 'timestamp': '2025-09-30 22:37:06.229556', 'step': 15828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:06.295253', 'step': 15828, 'epoch': 3} {'type': 'loss', 'content': 0.10014774650335312, 'timestamp': '2025-09-30 22:37:06.298945', 'step': 15829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:06.358063', 'step': 15829, 'epoch': 3} {'type': 'loss', 'content': 0.12422353029251099, 'timestamp': '2025-09-30 22:37:06.361745', 'step': 15830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:06.430459', 'step': 15830, 'epoch': 3} {'type': 'loss', 'content': 0.14899808168411255, 'timestamp': '2025-09-30 22:37:06.444247', 'step': 15831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:06.505828', 'step': 15831, 'epoch': 3} {'type': 'loss', 'content': 0.08501740545034409, 'timestamp': '2025-09-30 22:37:06.522992', 'step': 15832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:06.580941', 'step': 15832, 'epoch': 3} {'type': 'loss', 'content': 0.12702034413814545, 'timestamp': '2025-09-30 22:37:06.585580', 'step': 15833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:06.662857', 'step': 15833, 'epoch': 3} {'type': 'loss', 'content': 0.05670257657766342, 'timestamp': '2025-09-30 22:37:06.673006', 'step': 15834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:37:06.745303', 'step': 15834, 'epoch': 3} {'type': 'loss', 'content': 0.21353796124458313, 'timestamp': '2025-09-30 22:37:06.748101', 'step': 15835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:06.827391', 'step': 15835, 'epoch': 3} {'type': 'loss', 'content': 0.10769437998533249, 'timestamp': '2025-09-30 22:37:06.833856', 'step': 15836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:06.915104', 'step': 15836, 'epoch': 3} {'type': 'loss', 'content': 0.16390372812747955, 'timestamp': '2025-09-30 22:37:06.918479', 'step': 15837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:06.984883', 'step': 15837, 'epoch': 3} {'type': 'loss', 'content': 0.11797938495874405, 'timestamp': '2025-09-30 22:37:06.988002', 'step': 15838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:07.058116', 'step': 15838, 'epoch': 3} {'type': 'loss', 'content': 0.16450469195842743, 'timestamp': '2025-09-30 22:37:07.060523', 'step': 15839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:07.137444', 'step': 15839, 'epoch': 3} {'type': 'loss', 'content': 0.10548048466444016, 'timestamp': '2025-09-30 22:37:07.145733', 'step': 15840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:07.231392', 'step': 15840, 'epoch': 3} {'type': 'loss', 'content': 0.1399083137512207, 'timestamp': '2025-09-30 22:37:07.235866', 'step': 15841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:07.324815', 'step': 15841, 'epoch': 3} {'type': 'loss', 'content': 0.08891381323337555, 'timestamp': '2025-09-30 22:37:07.328191', 'step': 15842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:07.397873', 'step': 15842, 'epoch': 3} {'type': 'loss', 'content': 0.09454765915870667, 'timestamp': '2025-09-30 22:37:07.401694', 'step': 15843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:07.484013', 'step': 15843, 'epoch': 3} {'type': 'loss', 'content': 0.16458052396774292, 'timestamp': '2025-09-30 22:37:07.490772', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:37:21.634385', 'step': 15844, 'epoch': 3} {'type': 'pplx', 'content': 9025.171823365372, 'timestamp': '2025-09-30 22:37:21.638104', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:21.697112', 'step': 15844, 'epoch': 3} {'type': 'loss', 'content': 0.07337413728237152, 'timestamp': '2025-09-30 22:37:21.708632', 'step': 15845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:21.769989', 'step': 15845, 'epoch': 3} {'type': 'loss', 'content': 0.18597359955310822, 'timestamp': '2025-09-30 22:37:21.772304', 'step': 15846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:21.829845', 'step': 15846, 'epoch': 3} {'type': 'loss', 'content': 0.16012701392173767, 'timestamp': '2025-09-30 22:37:21.832480', 'step': 15847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:21.890572', 'step': 15847, 'epoch': 3} {'type': 'loss', 'content': 0.10511156171560287, 'timestamp': '2025-09-30 22:37:21.897092', 'step': 15848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:21.954195', 'step': 15848, 'epoch': 3} {'type': 'loss', 'content': 0.09419839084148407, 'timestamp': '2025-09-30 22:37:21.961808', 'step': 15849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:22.027666', 'step': 15849, 'epoch': 3} {'type': 'loss', 'content': 0.06834959983825684, 'timestamp': '2025-09-30 22:37:22.029829', 'step': 15850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:22.087465', 'step': 15850, 'epoch': 3} {'type': 'loss', 'content': 0.09678767621517181, 'timestamp': '2025-09-30 22:37:22.090025', 'step': 15851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:22.160453', 'step': 15851, 'epoch': 3} {'type': 'loss', 'content': 0.1771073192358017, 'timestamp': '2025-09-30 22:37:22.167391', 'step': 15852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:22.224877', 'step': 15852, 'epoch': 3} {'type': 'loss', 'content': 0.10052409768104553, 'timestamp': '2025-09-30 22:37:22.232084', 'step': 15853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:22.292569', 'step': 15853, 'epoch': 3} {'type': 'loss', 'content': 0.06437209993600845, 'timestamp': '2025-09-30 22:37:22.295034', 'step': 15854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:22.358587', 'step': 15854, 'epoch': 3} {'type': 'loss', 'content': 0.12341875582933426, 'timestamp': '2025-09-30 22:37:22.363943', 'step': 15855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:22.420581', 'step': 15855, 'epoch': 3} {'type': 'loss', 'content': 0.06278256326913834, 'timestamp': '2025-09-30 22:37:22.427291', 'step': 15856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:22.483545', 'step': 15856, 'epoch': 3} {'type': 'loss', 'content': 0.1109805554151535, 'timestamp': '2025-09-30 22:37:22.488129', 'step': 15857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:22.555068', 'step': 15857, 'epoch': 3} {'type': 'loss', 'content': 0.14578670263290405, 'timestamp': '2025-09-30 22:37:22.560985', 'step': 15858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:37:22.622738', 'step': 15858, 'epoch': 3} {'type': 'loss', 'content': 0.13305112719535828, 'timestamp': '2025-09-30 22:37:22.625736', 'step': 15859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:22.682955', 'step': 15859, 'epoch': 3} {'type': 'loss', 'content': 0.0985797867178917, 'timestamp': '2025-09-30 22:37:22.689395', 'step': 15860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:22.746539', 'step': 15860, 'epoch': 3} {'type': 'loss', 'content': 0.11561404913663864, 'timestamp': '2025-09-30 22:37:22.749046', 'step': 15861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:22.809338', 'step': 15861, 'epoch': 3} {'type': 'loss', 'content': 0.09639342129230499, 'timestamp': '2025-09-30 22:37:22.815744', 'step': 15862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:22.881186', 'step': 15862, 'epoch': 3} {'type': 'loss', 'content': 0.14152520895004272, 'timestamp': '2025-09-30 22:37:22.884223', 'step': 15863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:22.941348', 'step': 15863, 'epoch': 3} {'type': 'loss', 'content': 0.0721408873796463, 'timestamp': '2025-09-30 22:37:22.948665', 'step': 15864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:23.010209', 'step': 15864, 'epoch': 3} {'type': 'loss', 'content': 0.08958050608634949, 'timestamp': '2025-09-30 22:37:23.015416', 'step': 15865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:23.073927', 'step': 15865, 'epoch': 3} {'type': 'loss', 'content': 0.05701176077127457, 'timestamp': '2025-09-30 22:37:23.080006', 'step': 15866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:23.141824', 'step': 15866, 'epoch': 3} {'type': 'loss', 'content': 0.11479426920413971, 'timestamp': '2025-09-30 22:37:23.144727', 'step': 15867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:23.201753', 'step': 15867, 'epoch': 3} {'type': 'loss', 'content': 0.1301676332950592, 'timestamp': '2025-09-30 22:37:23.207990', 'step': 15868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:23.264516', 'step': 15868, 'epoch': 3} {'type': 'loss', 'content': 0.12265655398368835, 'timestamp': '2025-09-30 22:37:23.267670', 'step': 15869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:23.334767', 'step': 15869, 'epoch': 3} {'type': 'loss', 'content': 0.11298944056034088, 'timestamp': '2025-09-30 22:37:23.338650', 'step': 15870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:23.396559', 'step': 15870, 'epoch': 3} {'type': 'loss', 'content': 0.10820505023002625, 'timestamp': '2025-09-30 22:37:23.402746', 'step': 15871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:23.465683', 'step': 15871, 'epoch': 3} {'type': 'loss', 'content': 0.10264042764902115, 'timestamp': '2025-09-30 22:37:23.472532', 'step': 15872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:23.541928', 'step': 15872, 'epoch': 3} {'type': 'loss', 'content': 0.0813378244638443, 'timestamp': '2025-09-30 22:37:23.548047', 'step': 15873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:23.610998', 'step': 15873, 'epoch': 3} {'type': 'loss', 'content': 0.03180370479822159, 'timestamp': '2025-09-30 22:37:23.614163', 'step': 15874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:23.672741', 'step': 15874, 'epoch': 3} {'type': 'loss', 'content': 0.11773673444986343, 'timestamp': '2025-09-30 22:37:23.675758', 'step': 15875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:23.737392', 'step': 15875, 'epoch': 3} {'type': 'loss', 'content': 0.08413652330636978, 'timestamp': '2025-09-30 22:37:23.752799', 'step': 15876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:23.810203', 'step': 15876, 'epoch': 3} {'type': 'loss', 'content': 0.11681854724884033, 'timestamp': '2025-09-30 22:37:23.816476', 'step': 15877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:23.873830', 'step': 15877, 'epoch': 3} {'type': 'loss', 'content': 0.104576475918293, 'timestamp': '2025-09-30 22:37:23.876724', 'step': 15878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:23.935836', 'step': 15878, 'epoch': 3} {'type': 'loss', 'content': 0.1607840359210968, 'timestamp': '2025-09-30 22:37:23.939543', 'step': 15879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:24.014310', 'step': 15879, 'epoch': 3} {'type': 'loss', 'content': 0.10984321683645248, 'timestamp': '2025-09-30 22:37:24.021892', 'step': 15880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:24.079142', 'step': 15880, 'epoch': 3} {'type': 'loss', 'content': 0.09208367019891739, 'timestamp': '2025-09-30 22:37:24.085347', 'step': 15881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:24.144171', 'step': 15881, 'epoch': 3} {'type': 'loss', 'content': 0.08575746417045593, 'timestamp': '2025-09-30 22:37:24.148901', 'step': 15882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:24.211674', 'step': 15882, 'epoch': 3} {'type': 'loss', 'content': 0.15154002606868744, 'timestamp': '2025-09-30 22:37:24.216767', 'step': 15883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:24.275221', 'step': 15883, 'epoch': 3} {'type': 'loss', 'content': 0.11218396574258804, 'timestamp': '2025-09-30 22:37:24.290324', 'step': 15884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:24.348903', 'step': 15884, 'epoch': 3} {'type': 'loss', 'content': 0.09137104451656342, 'timestamp': '2025-09-30 22:37:24.355044', 'step': 15885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:24.413216', 'step': 15885, 'epoch': 3} {'type': 'loss', 'content': 0.05550963059067726, 'timestamp': '2025-09-30 22:37:24.422858', 'step': 15886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:24.481962', 'step': 15886, 'epoch': 3} {'type': 'loss', 'content': 0.08085055649280548, 'timestamp': '2025-09-30 22:37:24.487253', 'step': 15887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:24.545904', 'step': 15887, 'epoch': 3} {'type': 'loss', 'content': 0.11507031321525574, 'timestamp': '2025-09-30 22:37:24.553371', 'step': 15888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:24.619091', 'step': 15888, 'epoch': 3} {'type': 'loss', 'content': 0.067170150578022, 'timestamp': '2025-09-30 22:37:24.622049', 'step': 15889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:24.680497', 'step': 15889, 'epoch': 3} {'type': 'loss', 'content': 0.09615837037563324, 'timestamp': '2025-09-30 22:37:24.684924', 'step': 15890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:24.742657', 'step': 15890, 'epoch': 3} {'type': 'loss', 'content': 0.09837168455123901, 'timestamp': '2025-09-30 22:37:24.747306', 'step': 15891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:24.806122', 'step': 15891, 'epoch': 3} {'type': 'loss', 'content': 0.19141623377799988, 'timestamp': '2025-09-30 22:37:24.813658', 'step': 15892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:24.870596', 'step': 15892, 'epoch': 3} {'type': 'loss', 'content': 0.05909468233585358, 'timestamp': '2025-09-30 22:37:24.874404', 'step': 15893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:24.931726', 'step': 15893, 'epoch': 3} {'type': 'loss', 'content': 0.05401482433080673, 'timestamp': '2025-09-30 22:37:24.935647', 'step': 15894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:24.992473', 'step': 15894, 'epoch': 3} {'type': 'loss', 'content': 0.13362649083137512, 'timestamp': '2025-09-30 22:37:24.996633', 'step': 15895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:25.054451', 'step': 15895, 'epoch': 3} {'type': 'loss', 'content': 0.11964184790849686, 'timestamp': '2025-09-30 22:37:25.066454', 'step': 15896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:25.123443', 'step': 15896, 'epoch': 3} {'type': 'loss', 'content': 0.08913073688745499, 'timestamp': '2025-09-30 22:37:25.127079', 'step': 15897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:25.190438', 'step': 15897, 'epoch': 3} {'type': 'loss', 'content': 0.13663539290428162, 'timestamp': '2025-09-30 22:37:25.194135', 'step': 15898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:25.259747', 'step': 15898, 'epoch': 3} {'type': 'loss', 'content': 0.09074407815933228, 'timestamp': '2025-09-30 22:37:25.269749', 'step': 15899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:25.330285', 'step': 15899, 'epoch': 3} {'type': 'loss', 'content': 0.07637401670217514, 'timestamp': '2025-09-30 22:37:25.338381', 'step': 15900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:25.394841', 'step': 15900, 'epoch': 3} {'type': 'loss', 'content': 0.09409783035516739, 'timestamp': '2025-09-30 22:37:25.399018', 'step': 15901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:25.457574', 'step': 15901, 'epoch': 3} {'type': 'loss', 'content': 0.08352496474981308, 'timestamp': '2025-09-30 22:37:25.462595', 'step': 15902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:25.520004', 'step': 15902, 'epoch': 3} {'type': 'loss', 'content': 0.04168565943837166, 'timestamp': '2025-09-30 22:37:25.523600', 'step': 15903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:25.581564', 'step': 15903, 'epoch': 3} {'type': 'loss', 'content': 0.09277528524398804, 'timestamp': '2025-09-30 22:37:25.588491', 'step': 15904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:25.645231', 'step': 15904, 'epoch': 3} {'type': 'loss', 'content': 0.10272950679063797, 'timestamp': '2025-09-30 22:37:25.649606', 'step': 15905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:25.706951', 'step': 15905, 'epoch': 3} {'type': 'loss', 'content': 0.05649742856621742, 'timestamp': '2025-09-30 22:37:25.711763', 'step': 15906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:25.770689', 'step': 15906, 'epoch': 3} {'type': 'loss', 'content': 0.07832764834165573, 'timestamp': '2025-09-30 22:37:25.782166', 'step': 15907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:25.841155', 'step': 15907, 'epoch': 3} {'type': 'loss', 'content': 0.08511235564947128, 'timestamp': '2025-09-30 22:37:25.847830', 'step': 15908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:25.906749', 'step': 15908, 'epoch': 3} {'type': 'loss', 'content': 0.11921167373657227, 'timestamp': '2025-09-30 22:37:25.911304', 'step': 15909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:25.969564', 'step': 15909, 'epoch': 3} {'type': 'loss', 'content': 0.07285433262586594, 'timestamp': '2025-09-30 22:37:25.972440', 'step': 15910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:26.034706', 'step': 15910, 'epoch': 3} {'type': 'loss', 'content': 0.04657524824142456, 'timestamp': '2025-09-30 22:37:26.039203', 'step': 15911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:26.104127', 'step': 15911, 'epoch': 3} {'type': 'loss', 'content': 0.14305715262889862, 'timestamp': '2025-09-30 22:37:26.112547', 'step': 15912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:26.172708', 'step': 15912, 'epoch': 3} {'type': 'loss', 'content': 0.08044560998678207, 'timestamp': '2025-09-30 22:37:26.177416', 'step': 15913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:26.237975', 'step': 15913, 'epoch': 3} {'type': 'loss', 'content': 0.09503956884145737, 'timestamp': '2025-09-30 22:37:26.248957', 'step': 15914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:26.307315', 'step': 15914, 'epoch': 3} {'type': 'loss', 'content': 0.0779871791601181, 'timestamp': '2025-09-30 22:37:26.322649', 'step': 15915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:26.381928', 'step': 15915, 'epoch': 3} {'type': 'loss', 'content': 0.10496315360069275, 'timestamp': '2025-09-30 22:37:26.395675', 'step': 15916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:26.453599', 'step': 15916, 'epoch': 3} {'type': 'loss', 'content': 0.11346166580915451, 'timestamp': '2025-09-30 22:37:26.456775', 'step': 15917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:26.520954', 'step': 15917, 'epoch': 3} {'type': 'loss', 'content': 0.11766639351844788, 'timestamp': '2025-09-30 22:37:26.524822', 'step': 15918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:26.590253', 'step': 15918, 'epoch': 3} {'type': 'loss', 'content': 0.11295855790376663, 'timestamp': '2025-09-30 22:37:26.602171', 'step': 15919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:26.668028', 'step': 15919, 'epoch': 3} {'type': 'loss', 'content': 0.08732577413320541, 'timestamp': '2025-09-30 22:37:26.684058', 'step': 15920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:26.742998', 'step': 15920, 'epoch': 3} {'type': 'loss', 'content': 0.08116370439529419, 'timestamp': '2025-09-30 22:37:26.746442', 'step': 15921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:26.803359', 'step': 15921, 'epoch': 3} {'type': 'loss', 'content': 0.1349906623363495, 'timestamp': '2025-09-30 22:37:26.807509', 'step': 15922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:26.864732', 'step': 15922, 'epoch': 3} {'type': 'loss', 'content': 0.19058486819267273, 'timestamp': '2025-09-30 22:37:26.873974', 'step': 15923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:26.935309', 'step': 15923, 'epoch': 3} {'type': 'loss', 'content': 0.06755764782428741, 'timestamp': '2025-09-30 22:37:26.942617', 'step': 15924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:27.009300', 'step': 15924, 'epoch': 3} {'type': 'loss', 'content': 0.08112899214029312, 'timestamp': '2025-09-30 22:37:27.019013', 'step': 15925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:27.078656', 'step': 15925, 'epoch': 3} {'type': 'loss', 'content': 0.08339006453752518, 'timestamp': '2025-09-30 22:37:27.088782', 'step': 15926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:27.147230', 'step': 15926, 'epoch': 3} {'type': 'loss', 'content': 0.10167162865400314, 'timestamp': '2025-09-30 22:37:27.152074', 'step': 15927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:27.211661', 'step': 15927, 'epoch': 3} {'type': 'loss', 'content': 0.07317426055669785, 'timestamp': '2025-09-30 22:37:27.219607', 'step': 15928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:27.281752', 'step': 15928, 'epoch': 3} {'type': 'loss', 'content': 0.1121407076716423, 'timestamp': '2025-09-30 22:37:27.285907', 'step': 15929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:27.352521', 'step': 15929, 'epoch': 3} {'type': 'loss', 'content': 0.06420177966356277, 'timestamp': '2025-09-30 22:37:27.359435', 'step': 15930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:27.419261', 'step': 15930, 'epoch': 3} {'type': 'loss', 'content': 0.18716996908187866, 'timestamp': '2025-09-30 22:37:27.425130', 'step': 15931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:27.485777', 'step': 15931, 'epoch': 3} {'type': 'loss', 'content': 0.10165784507989883, 'timestamp': '2025-09-30 22:37:27.493941', 'step': 15932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:27.552664', 'step': 15932, 'epoch': 3} {'type': 'loss', 'content': 0.11532459408044815, 'timestamp': '2025-09-30 22:37:27.557110', 'step': 15933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:27.621481', 'step': 15933, 'epoch': 3} {'type': 'loss', 'content': 0.08014228194952011, 'timestamp': '2025-09-30 22:37:27.626084', 'step': 15934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:27.686083', 'step': 15934, 'epoch': 3} {'type': 'loss', 'content': 0.04744201526045799, 'timestamp': '2025-09-30 22:37:27.690897', 'step': 15935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:27.748515', 'step': 15935, 'epoch': 3} {'type': 'loss', 'content': 0.1272815316915512, 'timestamp': '2025-09-30 22:37:27.764901', 'step': 15936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:27.825183', 'step': 15936, 'epoch': 3} {'type': 'loss', 'content': 0.06270012259483337, 'timestamp': '2025-09-30 22:37:27.830446', 'step': 15937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:27.889852', 'step': 15937, 'epoch': 3} {'type': 'loss', 'content': 0.08466117084026337, 'timestamp': '2025-09-30 22:37:27.898734', 'step': 15938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:27.958453', 'step': 15938, 'epoch': 3} {'type': 'loss', 'content': 0.05451416224241257, 'timestamp': '2025-09-30 22:37:27.964454', 'step': 15939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:28.024597', 'step': 15939, 'epoch': 3} {'type': 'loss', 'content': 0.12661299109458923, 'timestamp': '2025-09-30 22:37:28.034059', 'step': 15940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:28.092867', 'step': 15940, 'epoch': 3} {'type': 'loss', 'content': 0.09730776399374008, 'timestamp': '2025-09-30 22:37:28.101344', 'step': 15941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:28.163770', 'step': 15941, 'epoch': 3} {'type': 'loss', 'content': 0.1814431995153427, 'timestamp': '2025-09-30 22:37:28.169021', 'step': 15942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:28.227342', 'step': 15942, 'epoch': 3} {'type': 'loss', 'content': 0.12069110572338104, 'timestamp': '2025-09-30 22:37:28.231428', 'step': 15943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:28.289019', 'step': 15943, 'epoch': 3} {'type': 'loss', 'content': 0.06831071525812149, 'timestamp': '2025-09-30 22:37:28.300162', 'step': 15944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:28.366996', 'step': 15944, 'epoch': 3} {'type': 'loss', 'content': 0.07476093620061874, 'timestamp': '2025-09-30 22:37:28.369798', 'step': 15945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:28.435159', 'step': 15945, 'epoch': 3} {'type': 'loss', 'content': 0.10235704481601715, 'timestamp': '2025-09-30 22:37:28.439058', 'step': 15946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:28.496253', 'step': 15946, 'epoch': 3} {'type': 'loss', 'content': 0.06120094656944275, 'timestamp': '2025-09-30 22:37:28.504161', 'step': 15947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:28.560894', 'step': 15947, 'epoch': 3} {'type': 'loss', 'content': 0.19390320777893066, 'timestamp': '2025-09-30 22:37:28.567325', 'step': 15948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:28.622738', 'step': 15948, 'epoch': 3} {'type': 'loss', 'content': 0.15515345335006714, 'timestamp': '2025-09-30 22:37:28.626641', 'step': 15949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:28.685389', 'step': 15949, 'epoch': 3} {'type': 'loss', 'content': 0.052151668816804886, 'timestamp': '2025-09-30 22:37:28.688306', 'step': 15950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:28.745893', 'step': 15950, 'epoch': 3} {'type': 'loss', 'content': 0.25159046053886414, 'timestamp': '2025-09-30 22:37:28.748330', 'step': 15951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:28.813219', 'step': 15951, 'epoch': 3} {'type': 'loss', 'content': 0.05415612831711769, 'timestamp': '2025-09-30 22:37:28.820990', 'step': 15952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:28.877433', 'step': 15952, 'epoch': 3} {'type': 'loss', 'content': 0.16344380378723145, 'timestamp': '2025-09-30 22:37:28.880198', 'step': 15953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:28.944910', 'step': 15953, 'epoch': 3} {'type': 'loss', 'content': 0.09764311462640762, 'timestamp': '2025-09-30 22:37:28.947470', 'step': 15954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:29.012233', 'step': 15954, 'epoch': 3} {'type': 'loss', 'content': 0.1835414469242096, 'timestamp': '2025-09-30 22:37:29.015262', 'step': 15955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:29.072277', 'step': 15955, 'epoch': 3} {'type': 'loss', 'content': 0.06761360168457031, 'timestamp': '2025-09-30 22:37:29.082508', 'step': 15956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:29.145407', 'step': 15956, 'epoch': 3} {'type': 'loss', 'content': 0.08564087748527527, 'timestamp': '2025-09-30 22:37:29.154421', 'step': 15957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:29.212379', 'step': 15957, 'epoch': 3} {'type': 'loss', 'content': 0.059823814779520035, 'timestamp': '2025-09-30 22:37:29.217384', 'step': 15958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:29.300281', 'step': 15958, 'epoch': 3} {'type': 'loss', 'content': 0.08673229068517685, 'timestamp': '2025-09-30 22:37:29.319850', 'step': 15959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:37:29.380978', 'step': 15959, 'epoch': 3} {'type': 'loss', 'content': 0.09344418346881866, 'timestamp': '2025-09-30 22:37:29.390774', 'step': 15960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:29.468693', 'step': 15960, 'epoch': 3} {'type': 'loss', 'content': 0.1666729748249054, 'timestamp': '2025-09-30 22:37:29.474000', 'step': 15961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:29.544647', 'step': 15961, 'epoch': 3} {'type': 'loss', 'content': 0.12997089326381683, 'timestamp': '2025-09-30 22:37:29.548979', 'step': 15962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:29.608424', 'step': 15962, 'epoch': 3} {'type': 'loss', 'content': 0.0773274376988411, 'timestamp': '2025-09-30 22:37:29.614187', 'step': 15963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:29.673243', 'step': 15963, 'epoch': 3} {'type': 'loss', 'content': 0.1631915420293808, 'timestamp': '2025-09-30 22:37:29.690809', 'step': 15964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:29.749671', 'step': 15964, 'epoch': 3} {'type': 'loss', 'content': 0.1367291659116745, 'timestamp': '2025-09-30 22:37:29.759391', 'step': 15965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:29.821454', 'step': 15965, 'epoch': 3} {'type': 'loss', 'content': 0.042849812656641006, 'timestamp': '2025-09-30 22:37:29.825998', 'step': 15966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:29.887156', 'step': 15966, 'epoch': 3} {'type': 'loss', 'content': 0.15335789322853088, 'timestamp': '2025-09-30 22:37:29.892570', 'step': 15967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:29.948419', 'step': 15967, 'epoch': 3} {'type': 'loss', 'content': 0.09773414582014084, 'timestamp': '2025-09-30 22:37:29.954698', 'step': 15968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:30.012145', 'step': 15968, 'epoch': 3} {'type': 'loss', 'content': 0.07956663519144058, 'timestamp': '2025-09-30 22:37:30.017208', 'step': 15969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:30.073686', 'step': 15969, 'epoch': 3} {'type': 'loss', 'content': 0.07186442613601685, 'timestamp': '2025-09-30 22:37:30.080148', 'step': 15970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:30.138923', 'step': 15970, 'epoch': 3} {'type': 'loss', 'content': 0.07868634909391403, 'timestamp': '2025-09-30 22:37:30.142997', 'step': 15971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:30.198958', 'step': 15971, 'epoch': 3} {'type': 'loss', 'content': 0.07877933233976364, 'timestamp': '2025-09-30 22:37:30.204692', 'step': 15972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:30.262611', 'step': 15972, 'epoch': 3} {'type': 'loss', 'content': 0.09443196654319763, 'timestamp': '2025-09-30 22:37:30.269513', 'step': 15973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:30.331644', 'step': 15973, 'epoch': 3} {'type': 'loss', 'content': 0.0798405334353447, 'timestamp': '2025-09-30 22:37:30.333807', 'step': 15974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:30.395406', 'step': 15974, 'epoch': 3} {'type': 'loss', 'content': 0.07584980875253677, 'timestamp': '2025-09-30 22:37:30.399936', 'step': 15975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:37:30.458202', 'step': 15975, 'epoch': 3} {'type': 'loss', 'content': 0.1706879436969757, 'timestamp': '2025-09-30 22:37:30.468552', 'step': 15976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:30.528028', 'step': 15976, 'epoch': 3} {'type': 'loss', 'content': 0.20119839906692505, 'timestamp': '2025-09-30 22:37:30.535607', 'step': 15977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:30.596542', 'step': 15977, 'epoch': 3} {'type': 'loss', 'content': 0.054991915822029114, 'timestamp': '2025-09-30 22:37:30.604054', 'step': 15978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:30.664540', 'step': 15978, 'epoch': 3} {'type': 'loss', 'content': 0.07223846018314362, 'timestamp': '2025-09-30 22:37:30.666911', 'step': 15979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:30.730791', 'step': 15979, 'epoch': 3} {'type': 'loss', 'content': 0.11706431210041046, 'timestamp': '2025-09-30 22:37:30.737689', 'step': 15980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:30.794282', 'step': 15980, 'epoch': 3} {'type': 'loss', 'content': 0.20994728803634644, 'timestamp': '2025-09-30 22:37:30.796741', 'step': 15981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:30.853691', 'step': 15981, 'epoch': 3} {'type': 'loss', 'content': 0.09747903048992157, 'timestamp': '2025-09-30 22:37:30.857024', 'step': 15982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:30.920221', 'step': 15982, 'epoch': 3} {'type': 'loss', 'content': 0.13141633570194244, 'timestamp': '2025-09-30 22:37:30.924271', 'step': 15983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:30.981428', 'step': 15983, 'epoch': 3} {'type': 'loss', 'content': 0.09291165322065353, 'timestamp': '2025-09-30 22:37:30.988557', 'step': 15984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.046129', 'step': 15984, 'epoch': 3} {'type': 'loss', 'content': 0.07358202338218689, 'timestamp': '2025-09-30 22:37:31.051083', 'step': 15985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.114174', 'step': 15985, 'epoch': 3} {'type': 'loss', 'content': 0.1116502434015274, 'timestamp': '2025-09-30 22:37:31.118128', 'step': 15986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:31.175405', 'step': 15986, 'epoch': 3} {'type': 'loss', 'content': 0.10819941014051437, 'timestamp': '2025-09-30 22:37:31.178896', 'step': 15987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:31.237199', 'step': 15987, 'epoch': 3} {'type': 'loss', 'content': 0.09802636504173279, 'timestamp': '2025-09-30 22:37:31.245150', 'step': 15988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:31.309212', 'step': 15988, 'epoch': 3} {'type': 'loss', 'content': 0.11128752678632736, 'timestamp': '2025-09-30 22:37:31.312180', 'step': 15989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:31.370873', 'step': 15989, 'epoch': 3} {'type': 'loss', 'content': 0.13483598828315735, 'timestamp': '2025-09-30 22:37:31.374148', 'step': 15990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.431577', 'step': 15990, 'epoch': 3} {'type': 'loss', 'content': 0.08484046161174774, 'timestamp': '2025-09-30 22:37:31.435533', 'step': 15991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:31.493503', 'step': 15991, 'epoch': 3} {'type': 'loss', 'content': 0.07405594736337662, 'timestamp': '2025-09-30 22:37:31.501731', 'step': 15992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.564328', 'step': 15992, 'epoch': 3} {'type': 'loss', 'content': 0.12312844395637512, 'timestamp': '2025-09-30 22:37:31.576917', 'step': 15993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.634334', 'step': 15993, 'epoch': 3} {'type': 'loss', 'content': 0.12101335823535919, 'timestamp': '2025-09-30 22:37:31.638062', 'step': 15994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:31.696914', 'step': 15994, 'epoch': 3} {'type': 'loss', 'content': 0.10608737170696259, 'timestamp': '2025-09-30 22:37:31.706021', 'step': 15995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.763715', 'step': 15995, 'epoch': 3} {'type': 'loss', 'content': 0.07512360066175461, 'timestamp': '2025-09-30 22:37:31.770491', 'step': 15996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.827453', 'step': 15996, 'epoch': 3} {'type': 'loss', 'content': 0.1353551149368286, 'timestamp': '2025-09-30 22:37:31.830965', 'step': 15997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:31.892022', 'step': 15997, 'epoch': 3} {'type': 'loss', 'content': 0.06786542385816574, 'timestamp': '2025-09-30 22:37:31.905230', 'step': 15998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:31.962795', 'step': 15998, 'epoch': 3} {'type': 'loss', 'content': 0.036910027265548706, 'timestamp': '2025-09-30 22:37:31.968905', 'step': 15999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:32.032283', 'step': 15999, 'epoch': 3} {'type': 'loss', 'content': 0.18496400117874146, 'timestamp': '2025-09-30 22:37:32.041667', 'step': 16000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16000', 'timestamp': '2025-09-30 22:37:32.517096', 'step': 16000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:32.579011', 'step': 16000, 'epoch': 3} {'type': 'loss', 'content': 0.07086369395256042, 'timestamp': '2025-09-30 22:37:32.590875', 'step': 16001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:32.649587', 'step': 16001, 'epoch': 3} {'type': 'loss', 'content': 0.03470267727971077, 'timestamp': '2025-09-30 22:37:32.653418', 'step': 16002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:32.711391', 'step': 16002, 'epoch': 3} {'type': 'loss', 'content': 0.1901610940694809, 'timestamp': '2025-09-30 22:37:32.714759', 'step': 16003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:32.771426', 'step': 16003, 'epoch': 3} {'type': 'loss', 'content': 0.0361885130405426, 'timestamp': '2025-09-30 22:37:32.779128', 'step': 16004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:32.837034', 'step': 16004, 'epoch': 3} {'type': 'loss', 'content': 0.11294223368167877, 'timestamp': '2025-09-30 22:37:32.840826', 'step': 16005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:32.897479', 'step': 16005, 'epoch': 3} {'type': 'loss', 'content': 0.06728032231330872, 'timestamp': '2025-09-30 22:37:32.907717', 'step': 16006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:32.976481', 'step': 16006, 'epoch': 3} {'type': 'loss', 'content': 0.1456139236688614, 'timestamp': '2025-09-30 22:37:32.984498', 'step': 16007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:33.048291', 'step': 16007, 'epoch': 3} {'type': 'loss', 'content': 0.07635114341974258, 'timestamp': '2025-09-30 22:37:33.056917', 'step': 16008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:33.123905', 'step': 16008, 'epoch': 3} {'type': 'loss', 'content': 0.06875217705965042, 'timestamp': '2025-09-30 22:37:33.126510', 'step': 16009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:33.185207', 'step': 16009, 'epoch': 3} {'type': 'loss', 'content': 0.12060954421758652, 'timestamp': '2025-09-30 22:37:33.189098', 'step': 16010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:33.248342', 'step': 16010, 'epoch': 3} {'type': 'loss', 'content': 0.060280777513980865, 'timestamp': '2025-09-30 22:37:33.251924', 'step': 16011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:33.311230', 'step': 16011, 'epoch': 3} {'type': 'loss', 'content': 0.0703912302851677, 'timestamp': '2025-09-30 22:37:33.319152', 'step': 16012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:33.378050', 'step': 16012, 'epoch': 3} {'type': 'loss', 'content': 0.13025064766407013, 'timestamp': '2025-09-30 22:37:33.383535', 'step': 16013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:33.450186', 'step': 16013, 'epoch': 3} {'type': 'loss', 'content': 0.06412974745035172, 'timestamp': '2025-09-30 22:37:33.455232', 'step': 16014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:33.513771', 'step': 16014, 'epoch': 3} {'type': 'loss', 'content': 0.0982663631439209, 'timestamp': '2025-09-30 22:37:33.516680', 'step': 16015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:33.581225', 'step': 16015, 'epoch': 3} {'type': 'loss', 'content': 0.07759391516447067, 'timestamp': '2025-09-30 22:37:33.595388', 'step': 16016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:33.652992', 'step': 16016, 'epoch': 3} {'type': 'loss', 'content': 0.15045063197612762, 'timestamp': '2025-09-30 22:37:33.657038', 'step': 16017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:33.715995', 'step': 16017, 'epoch': 3} {'type': 'loss', 'content': 0.15058258175849915, 'timestamp': '2025-09-30 22:37:33.720059', 'step': 16018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:33.778616', 'step': 16018, 'epoch': 3} {'type': 'loss', 'content': 0.10838110744953156, 'timestamp': '2025-09-30 22:37:33.782140', 'step': 16019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:33.841332', 'step': 16019, 'epoch': 3} {'type': 'loss', 'content': 0.06055711582303047, 'timestamp': '2025-09-30 22:37:33.858844', 'step': 16020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:33.917652', 'step': 16020, 'epoch': 3} {'type': 'loss', 'content': 0.057174988090991974, 'timestamp': '2025-09-30 22:37:33.921454', 'step': 16021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:33.979028', 'step': 16021, 'epoch': 3} {'type': 'loss', 'content': 0.056041695177555084, 'timestamp': '2025-09-30 22:37:33.981936', 'step': 16022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:34.039416', 'step': 16022, 'epoch': 3} {'type': 'loss', 'content': 0.1590627282857895, 'timestamp': '2025-09-30 22:37:34.044221', 'step': 16023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:34.102560', 'step': 16023, 'epoch': 3} {'type': 'loss', 'content': 0.07034183293581009, 'timestamp': '2025-09-30 22:37:34.113834', 'step': 16024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:34.172855', 'step': 16024, 'epoch': 3} {'type': 'loss', 'content': 0.08806530386209488, 'timestamp': '2025-09-30 22:37:34.176201', 'step': 16025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:34.234730', 'step': 16025, 'epoch': 3} {'type': 'loss', 'content': 0.10145179182291031, 'timestamp': '2025-09-30 22:37:34.243649', 'step': 16026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:34.301191', 'step': 16026, 'epoch': 3} {'type': 'loss', 'content': 0.11612185835838318, 'timestamp': '2025-09-30 22:37:34.304512', 'step': 16027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:34.364669', 'step': 16027, 'epoch': 3} {'type': 'loss', 'content': 0.14399373531341553, 'timestamp': '2025-09-30 22:37:34.372803', 'step': 16028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:34.433644', 'step': 16028, 'epoch': 3} {'type': 'loss', 'content': 0.059149451553821564, 'timestamp': '2025-09-30 22:37:34.443024', 'step': 16029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:34.502052', 'step': 16029, 'epoch': 3} {'type': 'loss', 'content': 0.07682766765356064, 'timestamp': '2025-09-30 22:37:34.505536', 'step': 16030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:34.564953', 'step': 16030, 'epoch': 3} {'type': 'loss', 'content': 0.11594779044389725, 'timestamp': '2025-09-30 22:37:34.568169', 'step': 16031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:34.626877', 'step': 16031, 'epoch': 3} {'type': 'loss', 'content': 0.07593829929828644, 'timestamp': '2025-09-30 22:37:34.633736', 'step': 16032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:34.690902', 'step': 16032, 'epoch': 3} {'type': 'loss', 'content': 0.1234130859375, 'timestamp': '2025-09-30 22:37:34.696042', 'step': 16033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:34.760267', 'step': 16033, 'epoch': 3} {'type': 'loss', 'content': 0.1275598704814911, 'timestamp': '2025-09-30 22:37:34.764636', 'step': 16034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:34.822744', 'step': 16034, 'epoch': 3} {'type': 'loss', 'content': 0.048532456159591675, 'timestamp': '2025-09-30 22:37:34.834199', 'step': 16035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:34.891468', 'step': 16035, 'epoch': 3} {'type': 'loss', 'content': 0.08547565340995789, 'timestamp': '2025-09-30 22:37:34.898875', 'step': 16036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:34.956270', 'step': 16036, 'epoch': 3} {'type': 'loss', 'content': 0.06736826151609421, 'timestamp': '2025-09-30 22:37:34.959763', 'step': 16037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:35.025303', 'step': 16037, 'epoch': 3} {'type': 'loss', 'content': 0.15760797262191772, 'timestamp': '2025-09-30 22:37:35.028181', 'step': 16038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:35.085869', 'step': 16038, 'epoch': 3} {'type': 'loss', 'content': 0.13638287782669067, 'timestamp': '2025-09-30 22:37:35.089786', 'step': 16039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:35.146845', 'step': 16039, 'epoch': 3} {'type': 'loss', 'content': 0.06889601051807404, 'timestamp': '2025-09-30 22:37:35.153570', 'step': 16040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:35.211277', 'step': 16040, 'epoch': 3} {'type': 'loss', 'content': 0.14809055626392365, 'timestamp': '2025-09-30 22:37:35.215539', 'step': 16041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:35.274371', 'step': 16041, 'epoch': 3} {'type': 'loss', 'content': 0.09549666941165924, 'timestamp': '2025-09-30 22:37:35.277769', 'step': 16042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:35.336398', 'step': 16042, 'epoch': 3} {'type': 'loss', 'content': 0.09792599081993103, 'timestamp': '2025-09-30 22:37:35.339856', 'step': 16043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:35.397548', 'step': 16043, 'epoch': 3} {'type': 'loss', 'content': 0.15506796538829803, 'timestamp': '2025-09-30 22:37:35.405058', 'step': 16044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:35.462187', 'step': 16044, 'epoch': 3} {'type': 'loss', 'content': 0.07942482084035873, 'timestamp': '2025-09-30 22:37:35.465330', 'step': 16045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:35.522322', 'step': 16045, 'epoch': 3} {'type': 'loss', 'content': 0.09289968013763428, 'timestamp': '2025-09-30 22:37:35.526705', 'step': 16046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:35.583851', 'step': 16046, 'epoch': 3} {'type': 'loss', 'content': 0.10992129147052765, 'timestamp': '2025-09-30 22:37:35.587473', 'step': 16047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:35.658729', 'step': 16047, 'epoch': 3} {'type': 'loss', 'content': 0.1031857430934906, 'timestamp': '2025-09-30 22:37:35.667407', 'step': 16048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:35.732911', 'step': 16048, 'epoch': 3} {'type': 'loss', 'content': 0.09338105469942093, 'timestamp': '2025-09-30 22:37:35.737897', 'step': 16049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:35.808300', 'step': 16049, 'epoch': 3} {'type': 'loss', 'content': 0.11988115310668945, 'timestamp': '2025-09-30 22:37:35.825692', 'step': 16050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:35.887059', 'step': 16050, 'epoch': 3} {'type': 'loss', 'content': 0.11218582838773727, 'timestamp': '2025-09-30 22:37:35.892156', 'step': 16051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:35.962933', 'step': 16051, 'epoch': 3} {'type': 'loss', 'content': 0.12705722451210022, 'timestamp': '2025-09-30 22:37:35.970674', 'step': 16052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:36.038426', 'step': 16052, 'epoch': 3} {'type': 'loss', 'content': 0.1755487471818924, 'timestamp': '2025-09-30 22:37:36.043013', 'step': 16053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:36.106741', 'step': 16053, 'epoch': 3} {'type': 'loss', 'content': 0.0552733950316906, 'timestamp': '2025-09-30 22:37:36.112250', 'step': 16054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:36.177164', 'step': 16054, 'epoch': 3} {'type': 'loss', 'content': 0.11114996671676636, 'timestamp': '2025-09-30 22:37:36.183000', 'step': 16055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:36.245482', 'step': 16055, 'epoch': 3} {'type': 'loss', 'content': 0.20423784852027893, 'timestamp': '2025-09-30 22:37:36.261430', 'step': 16056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:36.330178', 'step': 16056, 'epoch': 3} {'type': 'loss', 'content': 0.06813836842775345, 'timestamp': '2025-09-30 22:37:36.341163', 'step': 16057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:36.411659', 'step': 16057, 'epoch': 3} {'type': 'loss', 'content': 0.14018306136131287, 'timestamp': '2025-09-30 22:37:36.417642', 'step': 16058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:36.476743', 'step': 16058, 'epoch': 3} {'type': 'loss', 'content': 0.09076699614524841, 'timestamp': '2025-09-30 22:37:36.481636', 'step': 16059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:36.542395', 'step': 16059, 'epoch': 3} {'type': 'loss', 'content': 0.05239812657237053, 'timestamp': '2025-09-30 22:37:36.551447', 'step': 16060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:36.611335', 'step': 16060, 'epoch': 3} {'type': 'loss', 'content': 0.07620587199926376, 'timestamp': '2025-09-30 22:37:36.627591', 'step': 16061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:36.690382', 'step': 16061, 'epoch': 3} {'type': 'loss', 'content': 0.10300202667713165, 'timestamp': '2025-09-30 22:37:36.695183', 'step': 16062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:36.762107', 'step': 16062, 'epoch': 3} {'type': 'loss', 'content': 0.08606167137622833, 'timestamp': '2025-09-30 22:37:36.766182', 'step': 16063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:36.853062', 'step': 16063, 'epoch': 3} {'type': 'loss', 'content': 0.1083294004201889, 'timestamp': '2025-09-30 22:37:36.875180', 'step': 16064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:37:36.943851', 'step': 16064, 'epoch': 3} {'type': 'loss', 'content': 0.10226647555828094, 'timestamp': '2025-09-30 22:37:36.952879', 'step': 16065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:37.013569', 'step': 16065, 'epoch': 3} {'type': 'loss', 'content': 0.07249010354280472, 'timestamp': '2025-09-30 22:37:37.018690', 'step': 16066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:37.081139', 'step': 16066, 'epoch': 3} {'type': 'loss', 'content': 0.08428704738616943, 'timestamp': '2025-09-30 22:37:37.087875', 'step': 16067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:37.150018', 'step': 16067, 'epoch': 3} {'type': 'loss', 'content': 0.07191704213619232, 'timestamp': '2025-09-30 22:37:37.156693', 'step': 16068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:37.230322', 'step': 16068, 'epoch': 3} {'type': 'loss', 'content': 0.18210214376449585, 'timestamp': '2025-09-30 22:37:37.235156', 'step': 16069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:37.307526', 'step': 16069, 'epoch': 3} {'type': 'loss', 'content': 0.08537299931049347, 'timestamp': '2025-09-30 22:37:37.313270', 'step': 16070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:37.384688', 'step': 16070, 'epoch': 3} {'type': 'loss', 'content': 0.09054968506097794, 'timestamp': '2025-09-30 22:37:37.389068', 'step': 16071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:37.450012', 'step': 16071, 'epoch': 3} {'type': 'loss', 'content': 0.07331251353025436, 'timestamp': '2025-09-30 22:37:37.458892', 'step': 16072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:37.517387', 'step': 16072, 'epoch': 3} {'type': 'loss', 'content': 0.09156788885593414, 'timestamp': '2025-09-30 22:37:37.521507', 'step': 16073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:37.585937', 'step': 16073, 'epoch': 3} {'type': 'loss', 'content': 0.10103439539670944, 'timestamp': '2025-09-30 22:37:37.589534', 'step': 16074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:37.649307', 'step': 16074, 'epoch': 3} {'type': 'loss', 'content': 0.07802829891443253, 'timestamp': '2025-09-30 22:37:37.652442', 'step': 16075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:37:37.709862', 'step': 16075, 'epoch': 3} {'type': 'loss', 'content': 0.10942693799734116, 'timestamp': '2025-09-30 22:37:37.717575', 'step': 16076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:37.786052', 'step': 16076, 'epoch': 3} {'type': 'loss', 'content': 0.11145493388175964, 'timestamp': '2025-09-30 22:37:37.798619', 'step': 16077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:37.861536', 'step': 16077, 'epoch': 3} {'type': 'loss', 'content': 0.12357810139656067, 'timestamp': '2025-09-30 22:37:37.877334', 'step': 16078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:37.955857', 'step': 16078, 'epoch': 3} {'type': 'loss', 'content': 0.05354353040456772, 'timestamp': '2025-09-30 22:37:37.960392', 'step': 16079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:38.038743', 'step': 16079, 'epoch': 3} {'type': 'loss', 'content': 0.05593444034457207, 'timestamp': '2025-09-30 22:37:38.047874', 'step': 16080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:38.106444', 'step': 16080, 'epoch': 3} {'type': 'loss', 'content': 0.11674351245164871, 'timestamp': '2025-09-30 22:37:38.109492', 'step': 16081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:38.168330', 'step': 16081, 'epoch': 3} {'type': 'loss', 'content': 0.1536659598350525, 'timestamp': '2025-09-30 22:37:38.171998', 'step': 16082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:38.230892', 'step': 16082, 'epoch': 3} {'type': 'loss', 'content': 0.05674222111701965, 'timestamp': '2025-09-30 22:37:38.234847', 'step': 16083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:38.303846', 'step': 16083, 'epoch': 3} {'type': 'loss', 'content': 0.03807554394006729, 'timestamp': '2025-09-30 22:37:38.311018', 'step': 16084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:38.389106', 'step': 16084, 'epoch': 3} {'type': 'loss', 'content': 0.0760321095585823, 'timestamp': '2025-09-30 22:37:38.395252', 'step': 16085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:38.460136', 'step': 16085, 'epoch': 3} {'type': 'loss', 'content': 0.10407114773988724, 'timestamp': '2025-09-30 22:37:38.463981', 'step': 16086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:38.523299', 'step': 16086, 'epoch': 3} {'type': 'loss', 'content': 0.10106579959392548, 'timestamp': '2025-09-30 22:37:38.527522', 'step': 16087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:38.590601', 'step': 16087, 'epoch': 3} {'type': 'loss', 'content': 0.05268339440226555, 'timestamp': '2025-09-30 22:37:38.599198', 'step': 16088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:38.658667', 'step': 16088, 'epoch': 3} {'type': 'loss', 'content': 0.05571348965167999, 'timestamp': '2025-09-30 22:37:38.663119', 'step': 16089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:38.724057', 'step': 16089, 'epoch': 3} {'type': 'loss', 'content': 0.14205878973007202, 'timestamp': '2025-09-30 22:37:38.727607', 'step': 16090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:38.786608', 'step': 16090, 'epoch': 3} {'type': 'loss', 'content': 0.10156241804361343, 'timestamp': '2025-09-30 22:37:38.792192', 'step': 16091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:38.855897', 'step': 16091, 'epoch': 3} {'type': 'loss', 'content': 0.11231667548418045, 'timestamp': '2025-09-30 22:37:38.863894', 'step': 16092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:38.938088', 'step': 16092, 'epoch': 3} {'type': 'loss', 'content': 0.06893135607242584, 'timestamp': '2025-09-30 22:37:38.944259', 'step': 16093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:39.012403', 'step': 16093, 'epoch': 3} {'type': 'loss', 'content': 0.14864762127399445, 'timestamp': '2025-09-30 22:37:39.022129', 'step': 16094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:39.102907', 'step': 16094, 'epoch': 3} {'type': 'loss', 'content': 0.10283655673265457, 'timestamp': '2025-09-30 22:37:39.107087', 'step': 16095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:39.174326', 'step': 16095, 'epoch': 3} {'type': 'loss', 'content': 0.1586015373468399, 'timestamp': '2025-09-30 22:37:39.181881', 'step': 16096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:39.240396', 'step': 16096, 'epoch': 3} {'type': 'loss', 'content': 0.08207423239946365, 'timestamp': '2025-09-30 22:37:39.244187', 'step': 16097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:39.307625', 'step': 16097, 'epoch': 3} {'type': 'loss', 'content': 0.12253677099943161, 'timestamp': '2025-09-30 22:37:39.314176', 'step': 16098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:39.372995', 'step': 16098, 'epoch': 3} {'type': 'loss', 'content': 0.10997278988361359, 'timestamp': '2025-09-30 22:37:39.386159', 'step': 16099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:39.447117', 'step': 16099, 'epoch': 3} {'type': 'loss', 'content': 0.15146301686763763, 'timestamp': '2025-09-30 22:37:39.464581', 'step': 16100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:39.521247', 'step': 16100, 'epoch': 3} {'type': 'loss', 'content': 0.09197515994310379, 'timestamp': '2025-09-30 22:37:39.526105', 'step': 16101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:39.598082', 'step': 16101, 'epoch': 3} {'type': 'loss', 'content': 0.07490471750497818, 'timestamp': '2025-09-30 22:37:39.603935', 'step': 16102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:39.665310', 'step': 16102, 'epoch': 3} {'type': 'loss', 'content': 0.14498218894004822, 'timestamp': '2025-09-30 22:37:39.670024', 'step': 16103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:39.735993', 'step': 16103, 'epoch': 3} {'type': 'loss', 'content': 0.07468848675489426, 'timestamp': '2025-09-30 22:37:39.750958', 'step': 16104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:39.812714', 'step': 16104, 'epoch': 3} {'type': 'loss', 'content': 0.05477561429142952, 'timestamp': '2025-09-30 22:37:39.817737', 'step': 16105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:39.883241', 'step': 16105, 'epoch': 3} {'type': 'loss', 'content': 0.060647912323474884, 'timestamp': '2025-09-30 22:37:39.887466', 'step': 16106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:39.945916', 'step': 16106, 'epoch': 3} {'type': 'loss', 'content': 0.15707561373710632, 'timestamp': '2025-09-30 22:37:39.950739', 'step': 16107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.011335', 'step': 16107, 'epoch': 3} {'type': 'loss', 'content': 0.13876402378082275, 'timestamp': '2025-09-30 22:37:40.019367', 'step': 16108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.079125', 'step': 16108, 'epoch': 3} {'type': 'loss', 'content': 0.04062746837735176, 'timestamp': '2025-09-30 22:37:40.095183', 'step': 16109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:40.159219', 'step': 16109, 'epoch': 3} {'type': 'loss', 'content': 0.10041197389364243, 'timestamp': '2025-09-30 22:37:40.165104', 'step': 16110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:40.224501', 'step': 16110, 'epoch': 3} {'type': 'loss', 'content': 0.12910576164722443, 'timestamp': '2025-09-30 22:37:40.228885', 'step': 16111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:40.300928', 'step': 16111, 'epoch': 3} {'type': 'loss', 'content': 0.05574866756796837, 'timestamp': '2025-09-30 22:37:40.309030', 'step': 16112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.375457', 'step': 16112, 'epoch': 3} {'type': 'loss', 'content': 0.1380956470966339, 'timestamp': '2025-09-30 22:37:40.380337', 'step': 16113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.469268', 'step': 16113, 'epoch': 3} {'type': 'loss', 'content': 0.11998150497674942, 'timestamp': '2025-09-30 22:37:40.488791', 'step': 16114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.552603', 'step': 16114, 'epoch': 3} {'type': 'loss', 'content': 0.08835331350564957, 'timestamp': '2025-09-30 22:37:40.556015', 'step': 16115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.627431', 'step': 16115, 'epoch': 3} {'type': 'loss', 'content': 0.08289521187543869, 'timestamp': '2025-09-30 22:37:40.638071', 'step': 16116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.699884', 'step': 16116, 'epoch': 3} {'type': 'loss', 'content': 0.09328778088092804, 'timestamp': '2025-09-30 22:37:40.708641', 'step': 16117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.771200', 'step': 16117, 'epoch': 3} {'type': 'loss', 'content': 0.12816008925437927, 'timestamp': '2025-09-30 22:37:40.777919', 'step': 16118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.849325', 'step': 16118, 'epoch': 3} {'type': 'loss', 'content': 0.05582336336374283, 'timestamp': '2025-09-30 22:37:40.856662', 'step': 16119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:40.920957', 'step': 16119, 'epoch': 3} {'type': 'loss', 'content': 0.0668858140707016, 'timestamp': '2025-09-30 22:37:40.929410', 'step': 16120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:40.990307', 'step': 16120, 'epoch': 3} {'type': 'loss', 'content': 0.027097364887595177, 'timestamp': '2025-09-30 22:37:41.002468', 'step': 16121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:41.066381', 'step': 16121, 'epoch': 3} {'type': 'loss', 'content': 0.053102489560842514, 'timestamp': '2025-09-30 22:37:41.072855', 'step': 16122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:41.138836', 'step': 16122, 'epoch': 3} {'type': 'loss', 'content': 0.09208749979734421, 'timestamp': '2025-09-30 22:37:41.145525', 'step': 16123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:41.208101', 'step': 16123, 'epoch': 3} {'type': 'loss', 'content': 0.08913548290729523, 'timestamp': '2025-09-30 22:37:41.215557', 'step': 16124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:41.279896', 'step': 16124, 'epoch': 3} {'type': 'loss', 'content': 0.08482067286968231, 'timestamp': '2025-09-30 22:37:41.284871', 'step': 16125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:41.345910', 'step': 16125, 'epoch': 3} {'type': 'loss', 'content': 0.16731303930282593, 'timestamp': '2025-09-30 22:37:41.350635', 'step': 16126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:41.425253', 'step': 16126, 'epoch': 3} {'type': 'loss', 'content': 0.08383756130933762, 'timestamp': '2025-09-30 22:37:41.435126', 'step': 16127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:41.496515', 'step': 16127, 'epoch': 3} {'type': 'loss', 'content': 0.06234263628721237, 'timestamp': '2025-09-30 22:37:41.506232', 'step': 16128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:41.576012', 'step': 16128, 'epoch': 3} {'type': 'loss', 'content': 0.06325604021549225, 'timestamp': '2025-09-30 22:37:41.589439', 'step': 16129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:41.662116', 'step': 16129, 'epoch': 3} {'type': 'loss', 'content': 0.15908168256282806, 'timestamp': '2025-09-30 22:37:41.666745', 'step': 16130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:41.727932', 'step': 16130, 'epoch': 3} {'type': 'loss', 'content': 0.05262685567140579, 'timestamp': '2025-09-30 22:37:41.731906', 'step': 16131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:37:41.794521', 'step': 16131, 'epoch': 3} {'type': 'loss', 'content': 0.1376393735408783, 'timestamp': '2025-09-30 22:37:41.802448', 'step': 16132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:41.865320', 'step': 16132, 'epoch': 3} {'type': 'loss', 'content': 0.07482301443815231, 'timestamp': '2025-09-30 22:37:41.871368', 'step': 16133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:41.944415', 'step': 16133, 'epoch': 3} {'type': 'loss', 'content': 0.0686132088303566, 'timestamp': '2025-09-30 22:37:41.947572', 'step': 16134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.007543', 'step': 16134, 'epoch': 3} {'type': 'loss', 'content': 0.10686156153678894, 'timestamp': '2025-09-30 22:37:42.011525', 'step': 16135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.069140', 'step': 16135, 'epoch': 3} {'type': 'loss', 'content': 0.08916769921779633, 'timestamp': '2025-09-30 22:37:42.085117', 'step': 16136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.154360', 'step': 16136, 'epoch': 3} {'type': 'loss', 'content': 0.08774507790803909, 'timestamp': '2025-09-30 22:37:42.164183', 'step': 16137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.227409', 'step': 16137, 'epoch': 3} {'type': 'loss', 'content': 0.0818503350019455, 'timestamp': '2025-09-30 22:37:42.235748', 'step': 16138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:42.300065', 'step': 16138, 'epoch': 3} {'type': 'loss', 'content': 0.12915171682834625, 'timestamp': '2025-09-30 22:37:42.305022', 'step': 16139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:42.367337', 'step': 16139, 'epoch': 3} {'type': 'loss', 'content': 0.08133117854595184, 'timestamp': '2025-09-30 22:37:42.374401', 'step': 16140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:42.432897', 'step': 16140, 'epoch': 3} {'type': 'loss', 'content': 0.1258058398962021, 'timestamp': '2025-09-30 22:37:42.436476', 'step': 16141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:42.494575', 'step': 16141, 'epoch': 3} {'type': 'loss', 'content': 0.053047917783260345, 'timestamp': '2025-09-30 22:37:42.498970', 'step': 16142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.562925', 'step': 16142, 'epoch': 3} {'type': 'loss', 'content': 0.08356351405382156, 'timestamp': '2025-09-30 22:37:42.575599', 'step': 16143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:42.636164', 'step': 16143, 'epoch': 3} {'type': 'loss', 'content': 0.06001608818769455, 'timestamp': '2025-09-30 22:37:42.643987', 'step': 16144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.700414', 'step': 16144, 'epoch': 3} {'type': 'loss', 'content': 0.10645852982997894, 'timestamp': '2025-09-30 22:37:42.705157', 'step': 16145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.765240', 'step': 16145, 'epoch': 3} {'type': 'loss', 'content': 0.17684021592140198, 'timestamp': '2025-09-30 22:37:42.777394', 'step': 16146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:42.836205', 'step': 16146, 'epoch': 3} {'type': 'loss', 'content': 0.10122939199209213, 'timestamp': '2025-09-30 22:37:42.839807', 'step': 16147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:42.899239', 'step': 16147, 'epoch': 3} {'type': 'loss', 'content': 0.1274167150259018, 'timestamp': '2025-09-30 22:37:42.917288', 'step': 16148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:42.977251', 'step': 16148, 'epoch': 3} {'type': 'loss', 'content': 0.06310004740953445, 'timestamp': '2025-09-30 22:37:42.980948', 'step': 16149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:43.040617', 'step': 16149, 'epoch': 3} {'type': 'loss', 'content': 0.14039185643196106, 'timestamp': '2025-09-30 22:37:43.043612', 'step': 16150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:43.101123', 'step': 16150, 'epoch': 3} {'type': 'loss', 'content': 0.17935512959957123, 'timestamp': '2025-09-30 22:37:43.116730', 'step': 16151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:43.184661', 'step': 16151, 'epoch': 3} {'type': 'loss', 'content': 0.10528569668531418, 'timestamp': '2025-09-30 22:37:43.198550', 'step': 16152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:43.263179', 'step': 16152, 'epoch': 3} {'type': 'loss', 'content': 0.10604254901409149, 'timestamp': '2025-09-30 22:37:43.266886', 'step': 16153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:43.330739', 'step': 16153, 'epoch': 3} {'type': 'loss', 'content': 0.13273490965366364, 'timestamp': '2025-09-30 22:37:43.340751', 'step': 16154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:43.407017', 'step': 16154, 'epoch': 3} {'type': 'loss', 'content': 0.06754768639802933, 'timestamp': '2025-09-30 22:37:43.409639', 'step': 16155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:43.467837', 'step': 16155, 'epoch': 3} {'type': 'loss', 'content': 0.09321123361587524, 'timestamp': '2025-09-30 22:37:43.475800', 'step': 16156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:43.532395', 'step': 16156, 'epoch': 3} {'type': 'loss', 'content': 0.1759757548570633, 'timestamp': '2025-09-30 22:37:43.538129', 'step': 16157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 5440033091648.0}, 'timestamp': '2025-09-30 22:37:43.599368', 'step': 16157, 'epoch': 3} {'type': 'loss', 'content': 0.21482768654823303, 'timestamp': '2025-09-30 22:37:43.602852', 'step': 16158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:43.674998', 'step': 16158, 'epoch': 3} {'type': 'loss', 'content': 0.0552741102874279, 'timestamp': '2025-09-30 22:37:43.680281', 'step': 16159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:43.740465', 'step': 16159, 'epoch': 3} {'type': 'loss', 'content': 0.1345142126083374, 'timestamp': '2025-09-30 22:37:43.749980', 'step': 16160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:43.808552', 'step': 16160, 'epoch': 3} {'type': 'loss', 'content': 0.1768123060464859, 'timestamp': '2025-09-30 22:37:43.813852', 'step': 16161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:43.890630', 'step': 16161, 'epoch': 3} {'type': 'loss', 'content': 0.13354115188121796, 'timestamp': '2025-09-30 22:37:43.899391', 'step': 16162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:43.962472', 'step': 16162, 'epoch': 3} {'type': 'loss', 'content': 0.08764160424470901, 'timestamp': '2025-09-30 22:37:43.965920', 'step': 16163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.023514', 'step': 16163, 'epoch': 3} {'type': 'loss', 'content': 0.08414552360773087, 'timestamp': '2025-09-30 22:37:44.036243', 'step': 16164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.095854', 'step': 16164, 'epoch': 3} {'type': 'loss', 'content': 0.08329812437295914, 'timestamp': '2025-09-30 22:37:44.100353', 'step': 16165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:44.159993', 'step': 16165, 'epoch': 3} {'type': 'loss', 'content': 0.07260014861822128, 'timestamp': '2025-09-30 22:37:44.163023', 'step': 16166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:44.221766', 'step': 16166, 'epoch': 3} {'type': 'loss', 'content': 0.06356184929609299, 'timestamp': '2025-09-30 22:37:44.225255', 'step': 16167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.295601', 'step': 16167, 'epoch': 3} {'type': 'loss', 'content': 0.05769963189959526, 'timestamp': '2025-09-30 22:37:44.308094', 'step': 16168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.381617', 'step': 16168, 'epoch': 3} {'type': 'loss', 'content': 0.10936200618743896, 'timestamp': '2025-09-30 22:37:44.384918', 'step': 16169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.441417', 'step': 16169, 'epoch': 3} {'type': 'loss', 'content': 0.08558467030525208, 'timestamp': '2025-09-30 22:37:44.444969', 'step': 16170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:44.503380', 'step': 16170, 'epoch': 3} {'type': 'loss', 'content': 0.08537165075540543, 'timestamp': '2025-09-30 22:37:44.507293', 'step': 16171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:44.584133', 'step': 16171, 'epoch': 3} {'type': 'loss', 'content': 0.0854579284787178, 'timestamp': '2025-09-30 22:37:44.596644', 'step': 16172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.652628', 'step': 16172, 'epoch': 3} {'type': 'loss', 'content': 0.108624167740345, 'timestamp': '2025-09-30 22:37:44.657123', 'step': 16173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.713902', 'step': 16173, 'epoch': 3} {'type': 'loss', 'content': 0.07207439094781876, 'timestamp': '2025-09-30 22:37:44.718231', 'step': 16174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.775725', 'step': 16174, 'epoch': 3} {'type': 'loss', 'content': 0.12159361690282822, 'timestamp': '2025-09-30 22:37:44.786567', 'step': 16175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:44.883168', 'step': 16175, 'epoch': 3} {'type': 'loss', 'content': 0.09591365605592728, 'timestamp': '2025-09-30 22:37:44.890052', 'step': 16176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:44.952940', 'step': 16176, 'epoch': 3} {'type': 'loss', 'content': 0.07504038512706757, 'timestamp': '2025-09-30 22:37:44.956483', 'step': 16177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.023342', 'step': 16177, 'epoch': 3} {'type': 'loss', 'content': 0.12259429693222046, 'timestamp': '2025-09-30 22:37:45.026124', 'step': 16178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.084516', 'step': 16178, 'epoch': 3} {'type': 'loss', 'content': 0.15268702805042267, 'timestamp': '2025-09-30 22:37:45.087748', 'step': 16179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.151844', 'step': 16179, 'epoch': 3} {'type': 'loss', 'content': 0.16500891745090485, 'timestamp': '2025-09-30 22:37:45.159177', 'step': 16180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:45.223289', 'step': 16180, 'epoch': 3} {'type': 'loss', 'content': 0.07523000985383987, 'timestamp': '2025-09-30 22:37:45.227325', 'step': 16181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:45.290112', 'step': 16181, 'epoch': 3} {'type': 'loss', 'content': 0.17172688245773315, 'timestamp': '2025-09-30 22:37:45.292887', 'step': 16182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.351803', 'step': 16182, 'epoch': 3} {'type': 'loss', 'content': 0.0819922387599945, 'timestamp': '2025-09-30 22:37:45.355572', 'step': 16183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.417854', 'step': 16183, 'epoch': 3} {'type': 'loss', 'content': 0.06248685345053673, 'timestamp': '2025-09-30 22:37:45.424200', 'step': 16184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.488666', 'step': 16184, 'epoch': 3} {'type': 'loss', 'content': 0.0988766998052597, 'timestamp': '2025-09-30 22:37:45.498372', 'step': 16185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:45.559011', 'step': 16185, 'epoch': 3} {'type': 'loss', 'content': 0.09863033145666122, 'timestamp': '2025-09-30 22:37:45.562786', 'step': 16186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.620335', 'step': 16186, 'epoch': 3} {'type': 'loss', 'content': 0.18696951866149902, 'timestamp': '2025-09-30 22:37:45.629621', 'step': 16187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:45.693676', 'step': 16187, 'epoch': 3} {'type': 'loss', 'content': 0.039291296154260635, 'timestamp': '2025-09-30 22:37:45.700576', 'step': 16188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:45.763373', 'step': 16188, 'epoch': 3} {'type': 'loss', 'content': 0.07899574935436249, 'timestamp': '2025-09-30 22:37:45.766857', 'step': 16189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:45.823458', 'step': 16189, 'epoch': 3} {'type': 'loss', 'content': 0.09193141013383865, 'timestamp': '2025-09-30 22:37:45.827461', 'step': 16190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:45.884924', 'step': 16190, 'epoch': 3} {'type': 'loss', 'content': 0.11981481313705444, 'timestamp': '2025-09-30 22:37:45.888627', 'step': 16191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:45.949469', 'step': 16191, 'epoch': 3} {'type': 'loss', 'content': 0.0868772566318512, 'timestamp': '2025-09-30 22:37:45.956539', 'step': 16192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:46.026680', 'step': 16192, 'epoch': 3} {'type': 'loss', 'content': 0.11729592084884644, 'timestamp': '2025-09-30 22:37:46.029557', 'step': 16193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:46.097876', 'step': 16193, 'epoch': 3} {'type': 'loss', 'content': 0.07287333160638809, 'timestamp': '2025-09-30 22:37:46.100392', 'step': 16194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:46.162168', 'step': 16194, 'epoch': 3} {'type': 'loss', 'content': 0.06326834857463837, 'timestamp': '2025-09-30 22:37:46.165207', 'step': 16195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:46.224257', 'step': 16195, 'epoch': 3} {'type': 'loss', 'content': 0.09908881038427353, 'timestamp': '2025-09-30 22:37:46.231200', 'step': 16196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:46.292696', 'step': 16196, 'epoch': 3} {'type': 'loss', 'content': 0.11534532904624939, 'timestamp': '2025-09-30 22:37:46.295906', 'step': 16197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:46.352298', 'step': 16197, 'epoch': 3} {'type': 'loss', 'content': 0.11965832114219666, 'timestamp': '2025-09-30 22:37:46.355503', 'step': 16198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:46.429000', 'step': 16198, 'epoch': 3} {'type': 'loss', 'content': 0.09049364924430847, 'timestamp': '2025-09-30 22:37:46.437251', 'step': 16199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:46.507350', 'step': 16199, 'epoch': 3} {'type': 'loss', 'content': 0.1150939017534256, 'timestamp': '2025-09-30 22:37:46.518721', 'step': 16200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:46.585751', 'step': 16200, 'epoch': 3} {'type': 'loss', 'content': 0.0994182676076889, 'timestamp': '2025-09-30 22:37:46.594153', 'step': 16201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:46.660619', 'step': 16201, 'epoch': 3} {'type': 'loss', 'content': 0.08410868793725967, 'timestamp': '2025-09-30 22:37:46.667233', 'step': 16202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:46.732712', 'step': 16202, 'epoch': 3} {'type': 'loss', 'content': 0.1218050867319107, 'timestamp': '2025-09-30 22:37:46.736261', 'step': 16203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:46.799522', 'step': 16203, 'epoch': 3} {'type': 'loss', 'content': 0.10139557719230652, 'timestamp': '2025-09-30 22:37:46.807538', 'step': 16204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:46.863576', 'step': 16204, 'epoch': 3} {'type': 'loss', 'content': 0.06699956208467484, 'timestamp': '2025-09-30 22:37:46.866619', 'step': 16205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:46.932274', 'step': 16205, 'epoch': 3} {'type': 'loss', 'content': 0.13905318081378937, 'timestamp': '2025-09-30 22:37:46.939589', 'step': 16206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:46.998822', 'step': 16206, 'epoch': 3} {'type': 'loss', 'content': 0.16876062750816345, 'timestamp': '2025-09-30 22:37:47.001836', 'step': 16207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:47.059344', 'step': 16207, 'epoch': 3} {'type': 'loss', 'content': 0.07945048809051514, 'timestamp': '2025-09-30 22:37:47.066445', 'step': 16208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:47.123113', 'step': 16208, 'epoch': 3} {'type': 'loss', 'content': 0.11165311187505722, 'timestamp': '2025-09-30 22:37:47.130522', 'step': 16209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:47.193231', 'step': 16209, 'epoch': 3} {'type': 'loss', 'content': 0.12351180613040924, 'timestamp': '2025-09-30 22:37:47.197226', 'step': 16210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:47.271149', 'step': 16210, 'epoch': 3} {'type': 'loss', 'content': 0.14521032571792603, 'timestamp': '2025-09-30 22:37:47.274974', 'step': 16211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:47.332864', 'step': 16211, 'epoch': 3} {'type': 'loss', 'content': 0.05596192181110382, 'timestamp': '2025-09-30 22:37:47.340112', 'step': 16212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:47.397767', 'step': 16212, 'epoch': 3} {'type': 'loss', 'content': 0.10010921210050583, 'timestamp': '2025-09-30 22:37:47.401846', 'step': 16213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:47.460630', 'step': 16213, 'epoch': 3} {'type': 'loss', 'content': 0.07964549213647842, 'timestamp': '2025-09-30 22:37:47.464567', 'step': 16214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:47.527048', 'step': 16214, 'epoch': 3} {'type': 'loss', 'content': 0.09948240220546722, 'timestamp': '2025-09-30 22:37:47.533297', 'step': 16215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:47.592752', 'step': 16215, 'epoch': 3} {'type': 'loss', 'content': 0.1290847510099411, 'timestamp': '2025-09-30 22:37:47.599415', 'step': 16216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:47.663965', 'step': 16216, 'epoch': 3} {'type': 'loss', 'content': 0.06482058763504028, 'timestamp': '2025-09-30 22:37:47.667122', 'step': 16217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:47.725412', 'step': 16217, 'epoch': 3} {'type': 'loss', 'content': 0.08038272708654404, 'timestamp': '2025-09-30 22:37:47.728920', 'step': 16218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:47.787778', 'step': 16218, 'epoch': 3} {'type': 'loss', 'content': 0.0974380224943161, 'timestamp': '2025-09-30 22:37:47.790542', 'step': 16219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:47.853061', 'step': 16219, 'epoch': 3} {'type': 'loss', 'content': 0.043620869517326355, 'timestamp': '2025-09-30 22:37:47.859559', 'step': 16220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:47.915989', 'step': 16220, 'epoch': 3} {'type': 'loss', 'content': 0.16891731321811676, 'timestamp': '2025-09-30 22:37:47.919567', 'step': 16221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:47.982592', 'step': 16221, 'epoch': 3} {'type': 'loss', 'content': 0.10795770585536957, 'timestamp': '2025-09-30 22:37:47.985661', 'step': 16222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:48.043774', 'step': 16222, 'epoch': 3} {'type': 'loss', 'content': 0.138835147023201, 'timestamp': '2025-09-30 22:37:48.046590', 'step': 16223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:48.104858', 'step': 16223, 'epoch': 3} {'type': 'loss', 'content': 0.12250509858131409, 'timestamp': '2025-09-30 22:37:48.115264', 'step': 16224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:48.172245', 'step': 16224, 'epoch': 3} {'type': 'loss', 'content': 0.12757568061351776, 'timestamp': '2025-09-30 22:37:48.175213', 'step': 16225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:48.234394', 'step': 16225, 'epoch': 3} {'type': 'loss', 'content': 0.17669008672237396, 'timestamp': '2025-09-30 22:37:48.237739', 'step': 16226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:48.298225', 'step': 16226, 'epoch': 3} {'type': 'loss', 'content': 0.12529869377613068, 'timestamp': '2025-09-30 22:37:48.305481', 'step': 16227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:48.366561', 'step': 16227, 'epoch': 3} {'type': 'loss', 'content': 0.0858030617237091, 'timestamp': '2025-09-30 22:37:48.374127', 'step': 16228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:48.436743', 'step': 16228, 'epoch': 3} {'type': 'loss', 'content': 0.05726642906665802, 'timestamp': '2025-09-30 22:37:48.440749', 'step': 16229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:48.499048', 'step': 16229, 'epoch': 3} {'type': 'loss', 'content': 0.06390408426523209, 'timestamp': '2025-09-30 22:37:48.502178', 'step': 16230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:48.559981', 'step': 16230, 'epoch': 3} {'type': 'loss', 'content': 0.10394053161144257, 'timestamp': '2025-09-30 22:37:48.563510', 'step': 16231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:48.629137', 'step': 16231, 'epoch': 3} {'type': 'loss', 'content': 0.08082130551338196, 'timestamp': '2025-09-30 22:37:48.641158', 'step': 16232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:48.703943', 'step': 16232, 'epoch': 3} {'type': 'loss', 'content': 0.1037902757525444, 'timestamp': '2025-09-30 22:37:48.712815', 'step': 16233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:48.777570', 'step': 16233, 'epoch': 3} {'type': 'loss', 'content': 0.09936995804309845, 'timestamp': '2025-09-30 22:37:48.781483', 'step': 16234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:48.852801', 'step': 16234, 'epoch': 3} {'type': 'loss', 'content': 0.08787175267934799, 'timestamp': '2025-09-30 22:37:48.861902', 'step': 16235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:48.922998', 'step': 16235, 'epoch': 3} {'type': 'loss', 'content': 0.11302272230386734, 'timestamp': '2025-09-30 22:37:48.930672', 'step': 16236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:48.990701', 'step': 16236, 'epoch': 3} {'type': 'loss', 'content': 0.05499623343348503, 'timestamp': '2025-09-30 22:37:48.997078', 'step': 16237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:49.059554', 'step': 16237, 'epoch': 3} {'type': 'loss', 'content': 0.07712561637163162, 'timestamp': '2025-09-30 22:37:49.070125', 'step': 16238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:49.128047', 'step': 16238, 'epoch': 3} {'type': 'loss', 'content': 0.09216532856225967, 'timestamp': '2025-09-30 22:37:49.131252', 'step': 16239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:49.196626', 'step': 16239, 'epoch': 3} {'type': 'loss', 'content': 0.03609982505440712, 'timestamp': '2025-09-30 22:37:49.206663', 'step': 16240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:49.271520', 'step': 16240, 'epoch': 3} {'type': 'loss', 'content': 0.11843400448560715, 'timestamp': '2025-09-30 22:37:49.288014', 'step': 16241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:37:49.347154', 'step': 16241, 'epoch': 3} {'type': 'loss', 'content': 0.08352880924940109, 'timestamp': '2025-09-30 22:37:49.352743', 'step': 16242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:49.413270', 'step': 16242, 'epoch': 3} {'type': 'loss', 'content': 0.06706611067056656, 'timestamp': '2025-09-30 22:37:49.430555', 'step': 16243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:49.513970', 'step': 16243, 'epoch': 3} {'type': 'loss', 'content': 0.08555371314287186, 'timestamp': '2025-09-30 22:37:49.522395', 'step': 16244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:49.586405', 'step': 16244, 'epoch': 3} {'type': 'loss', 'content': 0.1612476110458374, 'timestamp': '2025-09-30 22:37:49.590639', 'step': 16245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:49.653537', 'step': 16245, 'epoch': 3} {'type': 'loss', 'content': 0.08184833824634552, 'timestamp': '2025-09-30 22:37:49.656305', 'step': 16246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:49.720033', 'step': 16246, 'epoch': 3} {'type': 'loss', 'content': 0.10670142620801926, 'timestamp': '2025-09-30 22:37:49.723149', 'step': 16247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:49.787857', 'step': 16247, 'epoch': 3} {'type': 'loss', 'content': 0.0817834883928299, 'timestamp': '2025-09-30 22:37:49.795906', 'step': 16248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:49.853429', 'step': 16248, 'epoch': 3} {'type': 'loss', 'content': 0.09136629104614258, 'timestamp': '2025-09-30 22:37:49.856847', 'step': 16249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:49.914411', 'step': 16249, 'epoch': 3} {'type': 'loss', 'content': 0.04879565164446831, 'timestamp': '2025-09-30 22:37:49.918401', 'step': 16250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:49.980262', 'step': 16250, 'epoch': 3} {'type': 'loss', 'content': 0.08924051374197006, 'timestamp': '2025-09-30 22:37:49.987353', 'step': 16251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:50.049118', 'step': 16251, 'epoch': 3} {'type': 'loss', 'content': 0.15687617659568787, 'timestamp': '2025-09-30 22:37:50.056864', 'step': 16252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:50.113339', 'step': 16252, 'epoch': 3} {'type': 'loss', 'content': 0.11018017679452896, 'timestamp': '2025-09-30 22:37:50.117697', 'step': 16253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:37:50.181502', 'step': 16253, 'epoch': 3} {'type': 'loss', 'content': 0.0967431515455246, 'timestamp': '2025-09-30 22:37:50.184355', 'step': 16254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:50.256781', 'step': 16254, 'epoch': 3} {'type': 'loss', 'content': 0.15760457515716553, 'timestamp': '2025-09-30 22:37:50.261224', 'step': 16255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:50.327940', 'step': 16255, 'epoch': 3} {'type': 'loss', 'content': 0.07490348070859909, 'timestamp': '2025-09-30 22:37:50.335211', 'step': 16256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:50.410750', 'step': 16256, 'epoch': 3} {'type': 'loss', 'content': 0.050752051174640656, 'timestamp': '2025-09-30 22:37:50.417130', 'step': 16257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:50.491524', 'step': 16257, 'epoch': 3} {'type': 'loss', 'content': 0.03587254136800766, 'timestamp': '2025-09-30 22:37:50.498833', 'step': 16258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:50.562219', 'step': 16258, 'epoch': 3} {'type': 'loss', 'content': 0.1059342622756958, 'timestamp': '2025-09-30 22:37:50.566910', 'step': 16259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:50.628528', 'step': 16259, 'epoch': 3} {'type': 'loss', 'content': 0.03949800133705139, 'timestamp': '2025-09-30 22:37:50.637518', 'step': 16260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:50.722910', 'step': 16260, 'epoch': 3} {'type': 'loss', 'content': 0.11906949430704117, 'timestamp': '2025-09-30 22:37:50.726264', 'step': 16261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:50.796271', 'step': 16261, 'epoch': 3} {'type': 'loss', 'content': 0.08097881078720093, 'timestamp': '2025-09-30 22:37:50.799184', 'step': 16262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:50.856741', 'step': 16262, 'epoch': 3} {'type': 'loss', 'content': 0.04697311297059059, 'timestamp': '2025-09-30 22:37:50.867647', 'step': 16263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:50.927362', 'step': 16263, 'epoch': 3} {'type': 'loss', 'content': 0.0628037229180336, 'timestamp': '2025-09-30 22:37:50.937342', 'step': 16264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:50.998820', 'step': 16264, 'epoch': 3} {'type': 'loss', 'content': 0.14343860745429993, 'timestamp': '2025-09-30 22:37:51.001679', 'step': 16265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.064240', 'step': 16265, 'epoch': 3} {'type': 'loss', 'content': 0.07541678845882416, 'timestamp': '2025-09-30 22:37:51.067182', 'step': 16266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:51.133364', 'step': 16266, 'epoch': 3} {'type': 'loss', 'content': 0.09809146821498871, 'timestamp': '2025-09-30 22:37:51.136881', 'step': 16267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.195526', 'step': 16267, 'epoch': 3} {'type': 'loss', 'content': 0.15415653586387634, 'timestamp': '2025-09-30 22:37:51.202599', 'step': 16268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.266769', 'step': 16268, 'epoch': 3} {'type': 'loss', 'content': 0.17032572627067566, 'timestamp': '2025-09-30 22:37:51.269949', 'step': 16269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:51.330803', 'step': 16269, 'epoch': 3} {'type': 'loss', 'content': 0.07333866506814957, 'timestamp': '2025-09-30 22:37:51.333811', 'step': 16270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.391705', 'step': 16270, 'epoch': 3} {'type': 'loss', 'content': 0.10641808807849884, 'timestamp': '2025-09-30 22:37:51.395138', 'step': 16271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:51.453860', 'step': 16271, 'epoch': 3} {'type': 'loss', 'content': 0.04476116597652435, 'timestamp': '2025-09-30 22:37:51.460571', 'step': 16272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.520070', 'step': 16272, 'epoch': 3} {'type': 'loss', 'content': 0.10262317955493927, 'timestamp': '2025-09-30 22:37:51.522442', 'step': 16273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.579258', 'step': 16273, 'epoch': 3} {'type': 'loss', 'content': 0.12709644436836243, 'timestamp': '2025-09-30 22:37:51.586968', 'step': 16274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.649190', 'step': 16274, 'epoch': 3} {'type': 'loss', 'content': 0.17387567460536957, 'timestamp': '2025-09-30 22:37:51.654533', 'step': 16275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:51.715551', 'step': 16275, 'epoch': 3} {'type': 'loss', 'content': 0.06880415976047516, 'timestamp': '2025-09-30 22:37:51.727541', 'step': 16276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:51.784531', 'step': 16276, 'epoch': 3} {'type': 'loss', 'content': 0.12867622077465057, 'timestamp': '2025-09-30 22:37:51.789418', 'step': 16277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:51.848624', 'step': 16277, 'epoch': 3} {'type': 'loss', 'content': 0.16089265048503876, 'timestamp': '2025-09-30 22:37:51.851483', 'step': 16278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:51.911404', 'step': 16278, 'epoch': 3} {'type': 'loss', 'content': 0.17598459124565125, 'timestamp': '2025-09-30 22:37:51.913995', 'step': 16279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:51.975266', 'step': 16279, 'epoch': 3} {'type': 'loss', 'content': 0.14602060616016388, 'timestamp': '2025-09-30 22:37:51.984046', 'step': 16280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:52.044502', 'step': 16280, 'epoch': 3} {'type': 'loss', 'content': 0.09072363376617432, 'timestamp': '2025-09-30 22:37:52.047857', 'step': 16281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:52.106766', 'step': 16281, 'epoch': 3} {'type': 'loss', 'content': 0.06258062273263931, 'timestamp': '2025-09-30 22:37:52.112876', 'step': 16282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:52.171050', 'step': 16282, 'epoch': 3} {'type': 'loss', 'content': 0.09363174438476562, 'timestamp': '2025-09-30 22:37:52.181288', 'step': 16283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:52.240599', 'step': 16283, 'epoch': 3} {'type': 'loss', 'content': 0.12436521798372269, 'timestamp': '2025-09-30 22:37:52.247834', 'step': 16284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:52.308826', 'step': 16284, 'epoch': 3} {'type': 'loss', 'content': 0.0397789441049099, 'timestamp': '2025-09-30 22:37:52.312994', 'step': 16285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:52.375298', 'step': 16285, 'epoch': 3} {'type': 'loss', 'content': 0.07053793966770172, 'timestamp': '2025-09-30 22:37:52.378881', 'step': 16286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:52.441813', 'step': 16286, 'epoch': 3} {'type': 'loss', 'content': 0.10972762852907181, 'timestamp': '2025-09-30 22:37:52.444928', 'step': 16287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:52.507136', 'step': 16287, 'epoch': 3} {'type': 'loss', 'content': 0.057052332907915115, 'timestamp': '2025-09-30 22:37:52.514138', 'step': 16288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:52.573865', 'step': 16288, 'epoch': 3} {'type': 'loss', 'content': 0.0635809451341629, 'timestamp': '2025-09-30 22:37:52.577515', 'step': 16289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:52.649133', 'step': 16289, 'epoch': 3} {'type': 'loss', 'content': 0.09719513356685638, 'timestamp': '2025-09-30 22:37:52.652225', 'step': 16290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:52.709748', 'step': 16290, 'epoch': 3} {'type': 'loss', 'content': 0.10209984332323074, 'timestamp': '2025-09-30 22:37:52.714256', 'step': 16291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:52.773252', 'step': 16291, 'epoch': 3} {'type': 'loss', 'content': 0.08728764206171036, 'timestamp': '2025-09-30 22:37:52.780879', 'step': 16292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:52.859108', 'step': 16292, 'epoch': 3} {'type': 'loss', 'content': 0.02021564356982708, 'timestamp': '2025-09-30 22:37:52.863118', 'step': 16293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:52.923197', 'step': 16293, 'epoch': 3} {'type': 'loss', 'content': 0.18395766615867615, 'timestamp': '2025-09-30 22:37:52.926072', 'step': 16294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:52.989088', 'step': 16294, 'epoch': 3} {'type': 'loss', 'content': 0.06350845098495483, 'timestamp': '2025-09-30 22:37:52.992916', 'step': 16295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:53.058865', 'step': 16295, 'epoch': 3} {'type': 'loss', 'content': 0.15702120959758759, 'timestamp': '2025-09-30 22:37:53.066527', 'step': 16296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:37:53.131852', 'step': 16296, 'epoch': 3} {'type': 'loss', 'content': 0.0805838480591774, 'timestamp': '2025-09-30 22:37:53.136034', 'step': 16297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:53.204126', 'step': 16297, 'epoch': 3} {'type': 'loss', 'content': 0.21806088089942932, 'timestamp': '2025-09-30 22:37:53.207040', 'step': 16298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:53.266340', 'step': 16298, 'epoch': 3} {'type': 'loss', 'content': 0.10410737246274948, 'timestamp': '2025-09-30 22:37:53.270226', 'step': 16299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:53.330141', 'step': 16299, 'epoch': 3} {'type': 'loss', 'content': 0.05566519498825073, 'timestamp': '2025-09-30 22:37:53.340116', 'step': 16300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:53.398055', 'step': 16300, 'epoch': 3} {'type': 'loss', 'content': 0.08039312809705734, 'timestamp': '2025-09-30 22:37:53.402137', 'step': 16301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:53.465908', 'step': 16301, 'epoch': 3} {'type': 'loss', 'content': 0.0847243219614029, 'timestamp': '2025-09-30 22:37:53.471990', 'step': 16302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:53.531323', 'step': 16302, 'epoch': 3} {'type': 'loss', 'content': 0.11849061399698257, 'timestamp': '2025-09-30 22:37:53.535846', 'step': 16303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:53.594021', 'step': 16303, 'epoch': 3} {'type': 'loss', 'content': 0.05031890794634819, 'timestamp': '2025-09-30 22:37:53.602773', 'step': 16304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:53.671766', 'step': 16304, 'epoch': 3} {'type': 'loss', 'content': 0.11146549880504608, 'timestamp': '2025-09-30 22:37:53.683758', 'step': 16305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:37:53.745466', 'step': 16305, 'epoch': 3} {'type': 'loss', 'content': 0.11003392189741135, 'timestamp': '2025-09-30 22:37:53.751652', 'step': 16306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:37:53.811747', 'step': 16306, 'epoch': 3} {'type': 'loss', 'content': 0.05038060247898102, 'timestamp': '2025-09-30 22:37:53.815061', 'step': 16307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:37:53.875475', 'step': 16307, 'epoch': 3} {'type': 'loss', 'content': 0.0764104500412941, 'timestamp': '2025-09-30 22:37:53.882694', 'step': 16308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:53.940835', 'step': 16308, 'epoch': 3} {'type': 'loss', 'content': 0.0958692729473114, 'timestamp': '2025-09-30 22:37:53.944400', 'step': 16309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:37:54.003995', 'step': 16309, 'epoch': 3} {'type': 'loss', 'content': 0.10186144709587097, 'timestamp': '2025-09-30 22:37:54.016159', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:38:08.754635', 'step': 16310, 'epoch': 3} {'type': 'pplx', 'content': 9717.764895144477, 'timestamp': '2025-09-30 22:38:08.758870', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:08.817573', 'step': 16310, 'epoch': 3} {'type': 'loss', 'content': 0.10888997465372086, 'timestamp': '2025-09-30 22:38:08.826002', 'step': 16311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:08.894420', 'step': 16311, 'epoch': 3} {'type': 'loss', 'content': 0.035448987036943436, 'timestamp': '2025-09-30 22:38:08.912026', 'step': 16312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:08.973881', 'step': 16312, 'epoch': 3} {'type': 'loss', 'content': 0.06940960139036179, 'timestamp': '2025-09-30 22:38:08.983561', 'step': 16313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:09.042080', 'step': 16313, 'epoch': 3} {'type': 'loss', 'content': 0.1281231790781021, 'timestamp': '2025-09-30 22:38:09.046431', 'step': 16314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:09.106967', 'step': 16314, 'epoch': 3} {'type': 'loss', 'content': 0.06515459716320038, 'timestamp': '2025-09-30 22:38:09.110384', 'step': 16315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:09.167601', 'step': 16315, 'epoch': 3} {'type': 'loss', 'content': 0.17054495215415955, 'timestamp': '2025-09-30 22:38:09.176491', 'step': 16316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:09.234549', 'step': 16316, 'epoch': 3} {'type': 'loss', 'content': 0.0960087701678276, 'timestamp': '2025-09-30 22:38:09.238832', 'step': 16317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:09.305050', 'step': 16317, 'epoch': 3} {'type': 'loss', 'content': 0.034954898059368134, 'timestamp': '2025-09-30 22:38:09.308932', 'step': 16318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:09.376937', 'step': 16318, 'epoch': 3} {'type': 'loss', 'content': 0.08841264992952347, 'timestamp': '2025-09-30 22:38:09.381161', 'step': 16319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:09.439948', 'step': 16319, 'epoch': 3} {'type': 'loss', 'content': 0.09936051815748215, 'timestamp': '2025-09-30 22:38:09.449719', 'step': 16320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:09.529265', 'step': 16320, 'epoch': 3} {'type': 'loss', 'content': 0.11649557948112488, 'timestamp': '2025-09-30 22:38:09.541887', 'step': 16321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:09.608287', 'step': 16321, 'epoch': 3} {'type': 'loss', 'content': 0.0705014243721962, 'timestamp': '2025-09-30 22:38:09.614467', 'step': 16322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:09.679155', 'step': 16322, 'epoch': 3} {'type': 'loss', 'content': 0.03573883697390556, 'timestamp': '2025-09-30 22:38:09.695865', 'step': 16323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:09.755220', 'step': 16323, 'epoch': 3} {'type': 'loss', 'content': 0.09703448414802551, 'timestamp': '2025-09-30 22:38:09.765520', 'step': 16324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:09.834818', 'step': 16324, 'epoch': 3} {'type': 'loss', 'content': 0.08532225340604782, 'timestamp': '2025-09-30 22:38:09.840179', 'step': 16325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:09.906242', 'step': 16325, 'epoch': 3} {'type': 'loss', 'content': 0.07097572088241577, 'timestamp': '2025-09-30 22:38:09.910401', 'step': 16326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:09.974631', 'step': 16326, 'epoch': 3} {'type': 'loss', 'content': 0.11409511417150497, 'timestamp': '2025-09-30 22:38:09.978526', 'step': 16327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:10.060530', 'step': 16327, 'epoch': 3} {'type': 'loss', 'content': 0.156987264752388, 'timestamp': '2025-09-30 22:38:10.069039', 'step': 16328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:10.127520', 'step': 16328, 'epoch': 3} {'type': 'loss', 'content': 0.0674854964017868, 'timestamp': '2025-09-30 22:38:10.136975', 'step': 16329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:10.220014', 'step': 16329, 'epoch': 3} {'type': 'loss', 'content': 0.07100371271371841, 'timestamp': '2025-09-30 22:38:10.249603', 'step': 16330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:10.336581', 'step': 16330, 'epoch': 3} {'type': 'loss', 'content': 0.0985783115029335, 'timestamp': '2025-09-30 22:38:10.371751', 'step': 16331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:10.438272', 'step': 16331, 'epoch': 3} {'type': 'loss', 'content': 0.052717361599206924, 'timestamp': '2025-09-30 22:38:10.479689', 'step': 16332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:10.574867', 'step': 16332, 'epoch': 3} {'type': 'loss', 'content': 0.15882369875907898, 'timestamp': '2025-09-30 22:38:10.612238', 'step': 16333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:10.695353', 'step': 16333, 'epoch': 3} {'type': 'loss', 'content': 0.0869838073849678, 'timestamp': '2025-09-30 22:38:10.722159', 'step': 16334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:10.815258', 'step': 16334, 'epoch': 3} {'type': 'loss', 'content': 0.08000706881284714, 'timestamp': '2025-09-30 22:38:10.823465', 'step': 16335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:10.895690', 'step': 16335, 'epoch': 3} {'type': 'loss', 'content': 0.07604417949914932, 'timestamp': '2025-09-30 22:38:10.913855', 'step': 16336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:10.980388', 'step': 16336, 'epoch': 3} {'type': 'loss', 'content': 0.19460001587867737, 'timestamp': '2025-09-30 22:38:10.998736', 'step': 16337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:11.068569', 'step': 16337, 'epoch': 3} {'type': 'loss', 'content': 0.1549268215894699, 'timestamp': '2025-09-30 22:38:11.079418', 'step': 16338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:11.143088', 'step': 16338, 'epoch': 3} {'type': 'loss', 'content': 0.09036248922348022, 'timestamp': '2025-09-30 22:38:11.156448', 'step': 16339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:11.229212', 'step': 16339, 'epoch': 3} {'type': 'loss', 'content': 0.17807067930698395, 'timestamp': '2025-09-30 22:38:11.252366', 'step': 16340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:11.326112', 'step': 16340, 'epoch': 3} {'type': 'loss', 'content': 0.06459063291549683, 'timestamp': '2025-09-30 22:38:11.338865', 'step': 16341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:11.409912', 'step': 16341, 'epoch': 3} {'type': 'loss', 'content': 0.16902226209640503, 'timestamp': '2025-09-30 22:38:11.417003', 'step': 16342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:11.497668', 'step': 16342, 'epoch': 3} {'type': 'loss', 'content': 0.08474931120872498, 'timestamp': '2025-09-30 22:38:11.511573', 'step': 16343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:11.600924', 'step': 16343, 'epoch': 3} {'type': 'loss', 'content': 0.13502734899520874, 'timestamp': '2025-09-30 22:38:11.614947', 'step': 16344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:11.706407', 'step': 16344, 'epoch': 3} {'type': 'loss', 'content': 0.10186590999364853, 'timestamp': '2025-09-30 22:38:11.722840', 'step': 16345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:11.794173', 'step': 16345, 'epoch': 3} {'type': 'loss', 'content': 0.1795070469379425, 'timestamp': '2025-09-30 22:38:11.812256', 'step': 16346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:11.892016', 'step': 16346, 'epoch': 3} {'type': 'loss', 'content': 0.05978047102689743, 'timestamp': '2025-09-30 22:38:11.899547', 'step': 16347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:11.977617', 'step': 16347, 'epoch': 3} {'type': 'loss', 'content': 0.0798640176653862, 'timestamp': '2025-09-30 22:38:11.995851', 'step': 16348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:12.077790', 'step': 16348, 'epoch': 3} {'type': 'loss', 'content': 0.03547738119959831, 'timestamp': '2025-09-30 22:38:12.081695', 'step': 16349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:12.150103', 'step': 16349, 'epoch': 3} {'type': 'loss', 'content': 0.12455448508262634, 'timestamp': '2025-09-30 22:38:12.166423', 'step': 16350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:12.256146', 'step': 16350, 'epoch': 3} {'type': 'loss', 'content': 0.10641982406377792, 'timestamp': '2025-09-30 22:38:12.266030', 'step': 16351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:12.369475', 'step': 16351, 'epoch': 3} {'type': 'loss', 'content': 0.14045193791389465, 'timestamp': '2025-09-30 22:38:12.391070', 'step': 16352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:12.452447', 'step': 16352, 'epoch': 3} {'type': 'loss', 'content': 0.07205615192651749, 'timestamp': '2025-09-30 22:38:12.466619', 'step': 16353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:12.536866', 'step': 16353, 'epoch': 3} {'type': 'loss', 'content': 0.135825514793396, 'timestamp': '2025-09-30 22:38:12.541365', 'step': 16354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:12.630970', 'step': 16354, 'epoch': 3} {'type': 'loss', 'content': 0.047051407396793365, 'timestamp': '2025-09-30 22:38:12.643825', 'step': 16355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:12.731712', 'step': 16355, 'epoch': 3} {'type': 'loss', 'content': 0.04940573498606682, 'timestamp': '2025-09-30 22:38:12.750735', 'step': 16356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:12.827507', 'step': 16356, 'epoch': 3} {'type': 'loss', 'content': 0.0893753170967102, 'timestamp': '2025-09-30 22:38:12.838628', 'step': 16357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:12.911789', 'step': 16357, 'epoch': 3} {'type': 'loss', 'content': 0.08400698006153107, 'timestamp': '2025-09-30 22:38:12.923764', 'step': 16358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:12.982332', 'step': 16358, 'epoch': 3} {'type': 'loss', 'content': 0.08009742945432663, 'timestamp': '2025-09-30 22:38:12.991956', 'step': 16359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:13.048820', 'step': 16359, 'epoch': 3} {'type': 'loss', 'content': 0.0945364460349083, 'timestamp': '2025-09-30 22:38:13.056885', 'step': 16360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:13.115926', 'step': 16360, 'epoch': 3} {'type': 'loss', 'content': 0.058036934584379196, 'timestamp': '2025-09-30 22:38:13.119142', 'step': 16361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:13.179215', 'step': 16361, 'epoch': 3} {'type': 'loss', 'content': 0.1542334109544754, 'timestamp': '2025-09-30 22:38:13.184368', 'step': 16362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:13.248295', 'step': 16362, 'epoch': 3} {'type': 'loss', 'content': 0.0997777059674263, 'timestamp': '2025-09-30 22:38:13.260568', 'step': 16363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:13.319866', 'step': 16363, 'epoch': 3} {'type': 'loss', 'content': 0.1658310890197754, 'timestamp': '2025-09-30 22:38:13.336343', 'step': 16364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:13.395794', 'step': 16364, 'epoch': 3} {'type': 'loss', 'content': 0.21099324524402618, 'timestamp': '2025-09-30 22:38:13.404566', 'step': 16365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:13.462139', 'step': 16365, 'epoch': 3} {'type': 'loss', 'content': 0.062018994241952896, 'timestamp': '2025-09-30 22:38:13.475536', 'step': 16366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:13.541845', 'step': 16366, 'epoch': 3} {'type': 'loss', 'content': 0.08699306845664978, 'timestamp': '2025-09-30 22:38:13.546171', 'step': 16367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:13.605591', 'step': 16367, 'epoch': 3} {'type': 'loss', 'content': 0.12142665684223175, 'timestamp': '2025-09-30 22:38:13.613590', 'step': 16368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:13.672117', 'step': 16368, 'epoch': 3} {'type': 'loss', 'content': 0.0854676216840744, 'timestamp': '2025-09-30 22:38:13.683907', 'step': 16369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:13.742061', 'step': 16369, 'epoch': 3} {'type': 'loss', 'content': 0.11048383265733719, 'timestamp': '2025-09-30 22:38:13.754352', 'step': 16370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:13.824109', 'step': 16370, 'epoch': 3} {'type': 'loss', 'content': 0.11041654646396637, 'timestamp': '2025-09-30 22:38:13.827933', 'step': 16371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:13.896985', 'step': 16371, 'epoch': 3} {'type': 'loss', 'content': 0.08053798228502274, 'timestamp': '2025-09-30 22:38:13.907210', 'step': 16372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:13.965771', 'step': 16372, 'epoch': 3} {'type': 'loss', 'content': 0.10152622312307358, 'timestamp': '2025-09-30 22:38:13.971560', 'step': 16373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:14.033194', 'step': 16373, 'epoch': 3} {'type': 'loss', 'content': 0.031823381781578064, 'timestamp': '2025-09-30 22:38:14.040577', 'step': 16374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:14.102127', 'step': 16374, 'epoch': 3} {'type': 'loss', 'content': 0.16874200105667114, 'timestamp': '2025-09-30 22:38:14.105737', 'step': 16375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:14.163305', 'step': 16375, 'epoch': 3} {'type': 'loss', 'content': 0.027423694729804993, 'timestamp': '2025-09-30 22:38:14.173064', 'step': 16376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:14.231715', 'step': 16376, 'epoch': 3} {'type': 'loss', 'content': 0.049148231744766235, 'timestamp': '2025-09-30 22:38:14.244497', 'step': 16377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:14.315702', 'step': 16377, 'epoch': 3} {'type': 'loss', 'content': 0.057754285633563995, 'timestamp': '2025-09-30 22:38:14.319329', 'step': 16378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:14.378796', 'step': 16378, 'epoch': 3} {'type': 'loss', 'content': 0.08402245491743088, 'timestamp': '2025-09-30 22:38:14.381635', 'step': 16379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:14.442365', 'step': 16379, 'epoch': 3} {'type': 'loss', 'content': 0.09720708429813385, 'timestamp': '2025-09-30 22:38:14.450424', 'step': 16380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:14.516087', 'step': 16380, 'epoch': 3} {'type': 'loss', 'content': 0.08958891034126282, 'timestamp': '2025-09-30 22:38:14.520611', 'step': 16381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:14.580407', 'step': 16381, 'epoch': 3} {'type': 'loss', 'content': 0.11565206944942474, 'timestamp': '2025-09-30 22:38:14.584206', 'step': 16382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:14.642988', 'step': 16382, 'epoch': 3} {'type': 'loss', 'content': 0.16908805072307587, 'timestamp': '2025-09-30 22:38:14.645938', 'step': 16383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:14.704982', 'step': 16383, 'epoch': 3} {'type': 'loss', 'content': 0.13368527591228485, 'timestamp': '2025-09-30 22:38:14.711773', 'step': 16384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:14.769676', 'step': 16384, 'epoch': 3} {'type': 'loss', 'content': 0.08803954720497131, 'timestamp': '2025-09-30 22:38:14.773927', 'step': 16385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:14.835521', 'step': 16385, 'epoch': 3} {'type': 'loss', 'content': 0.08875849097967148, 'timestamp': '2025-09-30 22:38:14.839854', 'step': 16386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:14.902662', 'step': 16386, 'epoch': 3} {'type': 'loss', 'content': 0.06579296290874481, 'timestamp': '2025-09-30 22:38:14.916427', 'step': 16387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:14.974565', 'step': 16387, 'epoch': 3} {'type': 'loss', 'content': 0.1871948093175888, 'timestamp': '2025-09-30 22:38:14.982924', 'step': 16388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:15.042251', 'step': 16388, 'epoch': 3} {'type': 'loss', 'content': 0.1823147088289261, 'timestamp': '2025-09-30 22:38:15.046371', 'step': 16389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:15.116547', 'step': 16389, 'epoch': 3} {'type': 'loss', 'content': 0.134416863322258, 'timestamp': '2025-09-30 22:38:15.119076', 'step': 16390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:15.178886', 'step': 16390, 'epoch': 3} {'type': 'loss', 'content': 0.06692567467689514, 'timestamp': '2025-09-30 22:38:15.181867', 'step': 16391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:15.247723', 'step': 16391, 'epoch': 3} {'type': 'loss', 'content': 0.06398816406726837, 'timestamp': '2025-09-30 22:38:15.262253', 'step': 16392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:15.327655', 'step': 16392, 'epoch': 3} {'type': 'loss', 'content': 0.059473033994436264, 'timestamp': '2025-09-30 22:38:15.332437', 'step': 16393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:15.401581', 'step': 16393, 'epoch': 3} {'type': 'loss', 'content': 0.08416562527418137, 'timestamp': '2025-09-30 22:38:15.405757', 'step': 16394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:15.487243', 'step': 16394, 'epoch': 3} {'type': 'loss', 'content': 0.06761530786752701, 'timestamp': '2025-09-30 22:38:15.493437', 'step': 16395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:15.572322', 'step': 16395, 'epoch': 3} {'type': 'loss', 'content': 0.06549501419067383, 'timestamp': '2025-09-30 22:38:15.592010', 'step': 16396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:15.651869', 'step': 16396, 'epoch': 3} {'type': 'loss', 'content': 0.189100444316864, 'timestamp': '2025-09-30 22:38:15.656656', 'step': 16397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:15.715639', 'step': 16397, 'epoch': 3} {'type': 'loss', 'content': 0.0319521501660347, 'timestamp': '2025-09-30 22:38:15.720638', 'step': 16398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:15.800616', 'step': 16398, 'epoch': 3} {'type': 'loss', 'content': 0.1187509074807167, 'timestamp': '2025-09-30 22:38:15.803427', 'step': 16399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:15.861610', 'step': 16399, 'epoch': 3} {'type': 'loss', 'content': 0.11697995662689209, 'timestamp': '2025-09-30 22:38:15.875859', 'step': 16400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:15.933467', 'step': 16400, 'epoch': 3} {'type': 'loss', 'content': 0.10301296412944794, 'timestamp': '2025-09-30 22:38:15.938225', 'step': 16401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:16.015723', 'step': 16401, 'epoch': 3} {'type': 'loss', 'content': 0.11209199577569962, 'timestamp': '2025-09-30 22:38:16.023501', 'step': 16402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:16.107710', 'step': 16402, 'epoch': 3} {'type': 'loss', 'content': 0.0713205561041832, 'timestamp': '2025-09-30 22:38:16.116965', 'step': 16403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:16.188597', 'step': 16403, 'epoch': 3} {'type': 'loss', 'content': 0.050654638558626175, 'timestamp': '2025-09-30 22:38:16.196273', 'step': 16404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:16.264204', 'step': 16404, 'epoch': 3} {'type': 'loss', 'content': 0.08485520631074905, 'timestamp': '2025-09-30 22:38:16.268522', 'step': 16405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:16.328697', 'step': 16405, 'epoch': 3} {'type': 'loss', 'content': 0.06906875967979431, 'timestamp': '2025-09-30 22:38:16.332772', 'step': 16406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:16.389235', 'step': 16406, 'epoch': 3} {'type': 'loss', 'content': 0.05629616230726242, 'timestamp': '2025-09-30 22:38:16.394019', 'step': 16407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:16.458456', 'step': 16407, 'epoch': 3} {'type': 'loss', 'content': 0.14566102623939514, 'timestamp': '2025-09-30 22:38:16.477770', 'step': 16408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:16.537106', 'step': 16408, 'epoch': 3} {'type': 'loss', 'content': 0.1190185695886612, 'timestamp': '2025-09-30 22:38:16.552963', 'step': 16409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:16.623512', 'step': 16409, 'epoch': 3} {'type': 'loss', 'content': 0.12592211365699768, 'timestamp': '2025-09-30 22:38:16.629179', 'step': 16410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:16.689200', 'step': 16410, 'epoch': 3} {'type': 'loss', 'content': 0.10267148166894913, 'timestamp': '2025-09-30 22:38:16.702573', 'step': 16411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:16.761617', 'step': 16411, 'epoch': 3} {'type': 'loss', 'content': 0.0712215006351471, 'timestamp': '2025-09-30 22:38:16.769200', 'step': 16412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:16.829693', 'step': 16412, 'epoch': 3} {'type': 'loss', 'content': 0.0988595262169838, 'timestamp': '2025-09-30 22:38:16.834297', 'step': 16413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:16.894587', 'step': 16413, 'epoch': 3} {'type': 'loss', 'content': 0.04326877370476723, 'timestamp': '2025-09-30 22:38:16.898866', 'step': 16414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:16.958439', 'step': 16414, 'epoch': 3} {'type': 'loss', 'content': 0.07794724404811859, 'timestamp': '2025-09-30 22:38:16.973540', 'step': 16415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.032846', 'step': 16415, 'epoch': 3} {'type': 'loss', 'content': 0.22580856084823608, 'timestamp': '2025-09-30 22:38:17.041137', 'step': 16416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:17.100872', 'step': 16416, 'epoch': 3} {'type': 'loss', 'content': 0.09640084952116013, 'timestamp': '2025-09-30 22:38:17.105579', 'step': 16417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:17.168224', 'step': 16417, 'epoch': 3} {'type': 'loss', 'content': 0.07884705066680908, 'timestamp': '2025-09-30 22:38:17.173025', 'step': 16418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:17.238557', 'step': 16418, 'epoch': 3} {'type': 'loss', 'content': 0.026686035096645355, 'timestamp': '2025-09-30 22:38:17.241318', 'step': 16419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.300927', 'step': 16419, 'epoch': 3} {'type': 'loss', 'content': 0.116731196641922, 'timestamp': '2025-09-30 22:38:17.319394', 'step': 16420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.378453', 'step': 16420, 'epoch': 3} {'type': 'loss', 'content': 0.1488543152809143, 'timestamp': '2025-09-30 22:38:17.382172', 'step': 16421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.449588', 'step': 16421, 'epoch': 3} {'type': 'loss', 'content': 0.12359116226434708, 'timestamp': '2025-09-30 22:38:17.453873', 'step': 16422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.514248', 'step': 16422, 'epoch': 3} {'type': 'loss', 'content': 0.11234134435653687, 'timestamp': '2025-09-30 22:38:17.519770', 'step': 16423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:17.589873', 'step': 16423, 'epoch': 3} {'type': 'loss', 'content': 0.06221596896648407, 'timestamp': '2025-09-30 22:38:17.597937', 'step': 16424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:17.658840', 'step': 16424, 'epoch': 3} {'type': 'loss', 'content': 0.13089533150196075, 'timestamp': '2025-09-30 22:38:17.666400', 'step': 16425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.724621', 'step': 16425, 'epoch': 3} {'type': 'loss', 'content': 0.1282370686531067, 'timestamp': '2025-09-30 22:38:17.728682', 'step': 16426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:17.790867', 'step': 16426, 'epoch': 3} {'type': 'loss', 'content': 0.03520935773849487, 'timestamp': '2025-09-30 22:38:17.794887', 'step': 16427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.852989', 'step': 16427, 'epoch': 3} {'type': 'loss', 'content': 0.07801859080791473, 'timestamp': '2025-09-30 22:38:17.861981', 'step': 16428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:17.922219', 'step': 16428, 'epoch': 3} {'type': 'loss', 'content': 0.1075320914387703, 'timestamp': '2025-09-30 22:38:17.926555', 'step': 16429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:17.994951', 'step': 16429, 'epoch': 3} {'type': 'loss', 'content': 0.0937415137887001, 'timestamp': '2025-09-30 22:38:17.999905', 'step': 16430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:38:18.073904', 'step': 16430, 'epoch': 3} {'type': 'loss', 'content': 0.03872634097933769, 'timestamp': '2025-09-30 22:38:18.077360', 'step': 16431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.137116', 'step': 16431, 'epoch': 3} {'type': 'loss', 'content': 0.09235262125730515, 'timestamp': '2025-09-30 22:38:18.145978', 'step': 16432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.205358', 'step': 16432, 'epoch': 3} {'type': 'loss', 'content': 0.1100117489695549, 'timestamp': '2025-09-30 22:38:18.209632', 'step': 16433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.282223', 'step': 16433, 'epoch': 3} {'type': 'loss', 'content': 0.1955786645412445, 'timestamp': '2025-09-30 22:38:18.286974', 'step': 16434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:18.359413', 'step': 16434, 'epoch': 3} {'type': 'loss', 'content': 0.03555253893136978, 'timestamp': '2025-09-30 22:38:18.363811', 'step': 16435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.424249', 'step': 16435, 'epoch': 3} {'type': 'loss', 'content': 0.11833807826042175, 'timestamp': '2025-09-30 22:38:18.432803', 'step': 16436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.494524', 'step': 16436, 'epoch': 3} {'type': 'loss', 'content': 0.08979654312133789, 'timestamp': '2025-09-30 22:38:18.499136', 'step': 16437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.572412', 'step': 16437, 'epoch': 3} {'type': 'loss', 'content': 0.09361355006694794, 'timestamp': '2025-09-30 22:38:18.578354', 'step': 16438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:18.651224', 'step': 16438, 'epoch': 3} {'type': 'loss', 'content': 0.19196389615535736, 'timestamp': '2025-09-30 22:38:18.668050', 'step': 16439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.726737', 'step': 16439, 'epoch': 3} {'type': 'loss', 'content': 0.06496252864599228, 'timestamp': '2025-09-30 22:38:18.735118', 'step': 16440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:18.796148', 'step': 16440, 'epoch': 3} {'type': 'loss', 'content': 0.013359187170863152, 'timestamp': '2025-09-30 22:38:18.803821', 'step': 16441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:18.865098', 'step': 16441, 'epoch': 3} {'type': 'loss', 'content': 0.060747552663087845, 'timestamp': '2025-09-30 22:38:18.868667', 'step': 16442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:18.927873', 'step': 16442, 'epoch': 3} {'type': 'loss', 'content': 0.08114977926015854, 'timestamp': '2025-09-30 22:38:18.932236', 'step': 16443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:19.010881', 'step': 16443, 'epoch': 3} {'type': 'loss', 'content': 0.08477234840393066, 'timestamp': '2025-09-30 22:38:19.019344', 'step': 16444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:19.079806', 'step': 16444, 'epoch': 3} {'type': 'loss', 'content': 0.11964475363492966, 'timestamp': '2025-09-30 22:38:19.085508', 'step': 16445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:19.154304', 'step': 16445, 'epoch': 3} {'type': 'loss', 'content': 0.11524944752454758, 'timestamp': '2025-09-30 22:38:19.158653', 'step': 16446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:19.218756', 'step': 16446, 'epoch': 3} {'type': 'loss', 'content': 0.10553275793790817, 'timestamp': '2025-09-30 22:38:19.222924', 'step': 16447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:19.294385', 'step': 16447, 'epoch': 3} {'type': 'loss', 'content': 0.07864682376384735, 'timestamp': '2025-09-30 22:38:19.303239', 'step': 16448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:19.373527', 'step': 16448, 'epoch': 3} {'type': 'loss', 'content': 0.09218848496675491, 'timestamp': '2025-09-30 22:38:19.376803', 'step': 16449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:19.436689', 'step': 16449, 'epoch': 3} {'type': 'loss', 'content': 0.04294126108288765, 'timestamp': '2025-09-30 22:38:19.442477', 'step': 16450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:19.502373', 'step': 16450, 'epoch': 3} {'type': 'loss', 'content': 0.07368989288806915, 'timestamp': '2025-09-30 22:38:19.506272', 'step': 16451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:19.568406', 'step': 16451, 'epoch': 3} {'type': 'loss', 'content': 0.11706560105085373, 'timestamp': '2025-09-30 22:38:19.578183', 'step': 16452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:19.651083', 'step': 16452, 'epoch': 3} {'type': 'loss', 'content': 0.07863279432058334, 'timestamp': '2025-09-30 22:38:19.656566', 'step': 16453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:19.724669', 'step': 16453, 'epoch': 3} {'type': 'loss', 'content': 0.059199463576078415, 'timestamp': '2025-09-30 22:38:19.743794', 'step': 16454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:19.812465', 'step': 16454, 'epoch': 3} {'type': 'loss', 'content': 0.15935863554477692, 'timestamp': '2025-09-30 22:38:19.817307', 'step': 16455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:19.884527', 'step': 16455, 'epoch': 3} {'type': 'loss', 'content': 0.06118694692850113, 'timestamp': '2025-09-30 22:38:19.905515', 'step': 16456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:19.963924', 'step': 16456, 'epoch': 3} {'type': 'loss', 'content': 0.06685464829206467, 'timestamp': '2025-09-30 22:38:19.981139', 'step': 16457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:20.052372', 'step': 16457, 'epoch': 3} {'type': 'loss', 'content': 0.20597830414772034, 'timestamp': '2025-09-30 22:38:20.068805', 'step': 16458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:20.127310', 'step': 16458, 'epoch': 3} {'type': 'loss', 'content': 0.06827396154403687, 'timestamp': '2025-09-30 22:38:20.142195', 'step': 16459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:20.200009', 'step': 16459, 'epoch': 3} {'type': 'loss', 'content': 0.08300775289535522, 'timestamp': '2025-09-30 22:38:20.209237', 'step': 16460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:20.267887', 'step': 16460, 'epoch': 3} {'type': 'loss', 'content': 0.16315145790576935, 'timestamp': '2025-09-30 22:38:20.273479', 'step': 16461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:20.330847', 'step': 16461, 'epoch': 3} {'type': 'loss', 'content': 0.08535731583833694, 'timestamp': '2025-09-30 22:38:20.335182', 'step': 16462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:20.405666', 'step': 16462, 'epoch': 3} {'type': 'loss', 'content': 0.061754632741212845, 'timestamp': '2025-09-30 22:38:20.411212', 'step': 16463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:20.469566', 'step': 16463, 'epoch': 3} {'type': 'loss', 'content': 0.0645303875207901, 'timestamp': '2025-09-30 22:38:20.490727', 'step': 16464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:20.551861', 'step': 16464, 'epoch': 3} {'type': 'loss', 'content': 0.180544912815094, 'timestamp': '2025-09-30 22:38:20.556678', 'step': 16465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:20.614821', 'step': 16465, 'epoch': 3} {'type': 'loss', 'content': 0.04257157817482948, 'timestamp': '2025-09-30 22:38:20.621314', 'step': 16466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:20.680354', 'step': 16466, 'epoch': 3} {'type': 'loss', 'content': 0.072109654545784, 'timestamp': '2025-09-30 22:38:20.685337', 'step': 16467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:20.744425', 'step': 16467, 'epoch': 3} {'type': 'loss', 'content': 0.09177835285663605, 'timestamp': '2025-09-30 22:38:20.752301', 'step': 16468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:20.825328', 'step': 16468, 'epoch': 3} {'type': 'loss', 'content': 0.10748612880706787, 'timestamp': '2025-09-30 22:38:20.830174', 'step': 16469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:20.889192', 'step': 16469, 'epoch': 3} {'type': 'loss', 'content': 0.10238654166460037, 'timestamp': '2025-09-30 22:38:20.894182', 'step': 16470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:20.954379', 'step': 16470, 'epoch': 3} {'type': 'loss', 'content': 0.08364340662956238, 'timestamp': '2025-09-30 22:38:20.959098', 'step': 16471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.016689', 'step': 16471, 'epoch': 3} {'type': 'loss', 'content': 0.0162891186773777, 'timestamp': '2025-09-30 22:38:21.025485', 'step': 16472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.083331', 'step': 16472, 'epoch': 3} {'type': 'loss', 'content': 0.15355339646339417, 'timestamp': '2025-09-30 22:38:21.087059', 'step': 16473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:21.145349', 'step': 16473, 'epoch': 3} {'type': 'loss', 'content': 0.0889553353190422, 'timestamp': '2025-09-30 22:38:21.161291', 'step': 16474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.221300', 'step': 16474, 'epoch': 3} {'type': 'loss', 'content': 0.14847344160079956, 'timestamp': '2025-09-30 22:38:21.237349', 'step': 16475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:21.299369', 'step': 16475, 'epoch': 3} {'type': 'loss', 'content': 0.13170014321804047, 'timestamp': '2025-09-30 22:38:21.306853', 'step': 16476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.370522', 'step': 16476, 'epoch': 3} {'type': 'loss', 'content': 0.07690923660993576, 'timestamp': '2025-09-30 22:38:21.389104', 'step': 16477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.449349', 'step': 16477, 'epoch': 3} {'type': 'loss', 'content': 0.058184102177619934, 'timestamp': '2025-09-30 22:38:21.467573', 'step': 16478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:21.550062', 'step': 16478, 'epoch': 3} {'type': 'loss', 'content': 0.08123424649238586, 'timestamp': '2025-09-30 22:38:21.555862', 'step': 16479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.624211', 'step': 16479, 'epoch': 3} {'type': 'loss', 'content': 0.11672573536634445, 'timestamp': '2025-09-30 22:38:21.642975', 'step': 16480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.702160', 'step': 16480, 'epoch': 3} {'type': 'loss', 'content': 0.08295505493879318, 'timestamp': '2025-09-30 22:38:21.707770', 'step': 16481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:21.768882', 'step': 16481, 'epoch': 3} {'type': 'loss', 'content': 0.048136644065380096, 'timestamp': '2025-09-30 22:38:21.774753', 'step': 16482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:21.834535', 'step': 16482, 'epoch': 3} {'type': 'loss', 'content': 0.09364005923271179, 'timestamp': '2025-09-30 22:38:21.840584', 'step': 16483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:21.908682', 'step': 16483, 'epoch': 3} {'type': 'loss', 'content': 0.08217918872833252, 'timestamp': '2025-09-30 22:38:21.917221', 'step': 16484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:21.982069', 'step': 16484, 'epoch': 3} {'type': 'loss', 'content': 0.07974673062562943, 'timestamp': '2025-09-30 22:38:21.987189', 'step': 16485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:22.046102', 'step': 16485, 'epoch': 3} {'type': 'loss', 'content': 0.14351511001586914, 'timestamp': '2025-09-30 22:38:22.062688', 'step': 16486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:22.129221', 'step': 16486, 'epoch': 3} {'type': 'loss', 'content': 0.05583978444337845, 'timestamp': '2025-09-30 22:38:22.132693', 'step': 16487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:22.205461', 'step': 16487, 'epoch': 3} {'type': 'loss', 'content': 0.06676992774009705, 'timestamp': '2025-09-30 22:38:22.215085', 'step': 16488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:22.277034', 'step': 16488, 'epoch': 3} {'type': 'loss', 'content': 0.13524436950683594, 'timestamp': '2025-09-30 22:38:22.292143', 'step': 16489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:22.367730', 'step': 16489, 'epoch': 3} {'type': 'loss', 'content': 0.13292241096496582, 'timestamp': '2025-09-30 22:38:22.372760', 'step': 16490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:22.431238', 'step': 16490, 'epoch': 3} {'type': 'loss', 'content': 0.07263407856225967, 'timestamp': '2025-09-30 22:38:22.435726', 'step': 16491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:22.499669', 'step': 16491, 'epoch': 3} {'type': 'loss', 'content': 0.10559996962547302, 'timestamp': '2025-09-30 22:38:22.521954', 'step': 16492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:22.581168', 'step': 16492, 'epoch': 3} {'type': 'loss', 'content': 0.07230521738529205, 'timestamp': '2025-09-30 22:38:22.586885', 'step': 16493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:38:22.656712', 'step': 16493, 'epoch': 3} {'type': 'loss', 'content': 0.07755367457866669, 'timestamp': '2025-09-30 22:38:22.662193', 'step': 16494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:22.729006', 'step': 16494, 'epoch': 3} {'type': 'loss', 'content': 0.08703232556581497, 'timestamp': '2025-09-30 22:38:22.732751', 'step': 16495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:22.792514', 'step': 16495, 'epoch': 3} {'type': 'loss', 'content': 0.13419726490974426, 'timestamp': '2025-09-30 22:38:22.800692', 'step': 16496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:22.865128', 'step': 16496, 'epoch': 3} {'type': 'loss', 'content': 0.08481669425964355, 'timestamp': '2025-09-30 22:38:22.868493', 'step': 16497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:22.927135', 'step': 16497, 'epoch': 3} {'type': 'loss', 'content': 0.12389453500509262, 'timestamp': '2025-09-30 22:38:22.931802', 'step': 16498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:23.001532', 'step': 16498, 'epoch': 3} {'type': 'loss', 'content': 0.09220211952924728, 'timestamp': '2025-09-30 22:38:23.005307', 'step': 16499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:23.065066', 'step': 16499, 'epoch': 3} {'type': 'loss', 'content': 0.07308214902877808, 'timestamp': '2025-09-30 22:38:23.086359', 'step': 16500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16500', 'timestamp': '2025-09-30 22:38:23.605466', 'step': 16500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:23.664402', 'step': 16500, 'epoch': 3} {'type': 'loss', 'content': 0.09095943719148636, 'timestamp': '2025-09-30 22:38:23.676204', 'step': 16501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:23.734190', 'step': 16501, 'epoch': 3} {'type': 'loss', 'content': 0.13675470650196075, 'timestamp': '2025-09-30 22:38:23.748058', 'step': 16502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:23.812653', 'step': 16502, 'epoch': 3} {'type': 'loss', 'content': 0.13027846813201904, 'timestamp': '2025-09-30 22:38:23.817648', 'step': 16503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:23.876224', 'step': 16503, 'epoch': 3} {'type': 'loss', 'content': 0.08402644097805023, 'timestamp': '2025-09-30 22:38:23.883209', 'step': 16504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:23.955058', 'step': 16504, 'epoch': 3} {'type': 'loss', 'content': 0.09198393672704697, 'timestamp': '2025-09-30 22:38:23.959766', 'step': 16505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:24.033165', 'step': 16505, 'epoch': 3} {'type': 'loss', 'content': 0.07532006502151489, 'timestamp': '2025-09-30 22:38:24.049195', 'step': 16506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:24.121591', 'step': 16506, 'epoch': 3} {'type': 'loss', 'content': 0.14314036071300507, 'timestamp': '2025-09-30 22:38:24.126860', 'step': 16507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:24.199983', 'step': 16507, 'epoch': 3} {'type': 'loss', 'content': 0.08090202510356903, 'timestamp': '2025-09-30 22:38:24.207392', 'step': 16508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:24.265184', 'step': 16508, 'epoch': 3} {'type': 'loss', 'content': 0.07380889356136322, 'timestamp': '2025-09-30 22:38:24.270300', 'step': 16509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:24.332402', 'step': 16509, 'epoch': 3} {'type': 'loss', 'content': 0.14855612814426422, 'timestamp': '2025-09-30 22:38:24.346744', 'step': 16510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:24.412711', 'step': 16510, 'epoch': 3} {'type': 'loss', 'content': 0.08772120624780655, 'timestamp': '2025-09-30 22:38:24.416482', 'step': 16511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:24.487592', 'step': 16511, 'epoch': 3} {'type': 'loss', 'content': 0.18272723257541656, 'timestamp': '2025-09-30 22:38:24.495162', 'step': 16512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:24.554864', 'step': 16512, 'epoch': 3} {'type': 'loss', 'content': 0.10099294036626816, 'timestamp': '2025-09-30 22:38:24.559257', 'step': 16513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:24.619161', 'step': 16513, 'epoch': 3} {'type': 'loss', 'content': 0.1738925278186798, 'timestamp': '2025-09-30 22:38:24.622423', 'step': 16514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:24.688467', 'step': 16514, 'epoch': 3} {'type': 'loss', 'content': 0.06805794686079025, 'timestamp': '2025-09-30 22:38:24.692274', 'step': 16515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:24.753624', 'step': 16515, 'epoch': 3} {'type': 'loss', 'content': 0.101251520216465, 'timestamp': '2025-09-30 22:38:24.761899', 'step': 16516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:24.840119', 'step': 16516, 'epoch': 3} {'type': 'loss', 'content': 0.09449563920497894, 'timestamp': '2025-09-30 22:38:24.843455', 'step': 16517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:24.914724', 'step': 16517, 'epoch': 3} {'type': 'loss', 'content': 0.06409421563148499, 'timestamp': '2025-09-30 22:38:24.927555', 'step': 16518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:24.991951', 'step': 16518, 'epoch': 3} {'type': 'loss', 'content': 0.08943001925945282, 'timestamp': '2025-09-30 22:38:24.995805', 'step': 16519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:25.053530', 'step': 16519, 'epoch': 3} {'type': 'loss', 'content': 0.10260248184204102, 'timestamp': '2025-09-30 22:38:25.061462', 'step': 16520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:25.123676', 'step': 16520, 'epoch': 3} {'type': 'loss', 'content': 0.09502343088388443, 'timestamp': '2025-09-30 22:38:25.128215', 'step': 16521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:25.188263', 'step': 16521, 'epoch': 3} {'type': 'loss', 'content': 0.11202085763216019, 'timestamp': '2025-09-30 22:38:25.198472', 'step': 16522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:38:25.259236', 'step': 16522, 'epoch': 3} {'type': 'loss', 'content': 0.08869999647140503, 'timestamp': '2025-09-30 22:38:25.265123', 'step': 16523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:25.327251', 'step': 16523, 'epoch': 3} {'type': 'loss', 'content': 0.11867958307266235, 'timestamp': '2025-09-30 22:38:25.335346', 'step': 16524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:25.396367', 'step': 16524, 'epoch': 3} {'type': 'loss', 'content': 0.03035770170390606, 'timestamp': '2025-09-30 22:38:25.400519', 'step': 16525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:25.474600', 'step': 16525, 'epoch': 3} {'type': 'loss', 'content': 0.05614681914448738, 'timestamp': '2025-09-30 22:38:25.480108', 'step': 16526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:25.547413', 'step': 16526, 'epoch': 3} {'type': 'loss', 'content': 0.031739626079797745, 'timestamp': '2025-09-30 22:38:25.551525', 'step': 16527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:25.613682', 'step': 16527, 'epoch': 3} {'type': 'loss', 'content': 0.12158430367708206, 'timestamp': '2025-09-30 22:38:25.632944', 'step': 16528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:25.691376', 'step': 16528, 'epoch': 3} {'type': 'loss', 'content': 0.09420677274465561, 'timestamp': '2025-09-30 22:38:25.708171', 'step': 16529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:25.769539', 'step': 16529, 'epoch': 3} {'type': 'loss', 'content': 0.08565600961446762, 'timestamp': '2025-09-30 22:38:25.772482', 'step': 16530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:25.837162', 'step': 16530, 'epoch': 3} {'type': 'loss', 'content': 0.14004448056221008, 'timestamp': '2025-09-30 22:38:25.841351', 'step': 16531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:25.911527', 'step': 16531, 'epoch': 3} {'type': 'loss', 'content': 0.07182107865810394, 'timestamp': '2025-09-30 22:38:25.930765', 'step': 16532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:25.995766', 'step': 16532, 'epoch': 3} {'type': 'loss', 'content': 0.1029098629951477, 'timestamp': '2025-09-30 22:38:26.001535', 'step': 16533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:26.062560', 'step': 16533, 'epoch': 3} {'type': 'loss', 'content': 0.07579277455806732, 'timestamp': '2025-09-30 22:38:26.068373', 'step': 16534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:26.127963', 'step': 16534, 'epoch': 3} {'type': 'loss', 'content': 0.22867777943611145, 'timestamp': '2025-09-30 22:38:26.132307', 'step': 16535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:26.205513', 'step': 16535, 'epoch': 3} {'type': 'loss', 'content': 0.08952931314706802, 'timestamp': '2025-09-30 22:38:26.223000', 'step': 16536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:26.282058', 'step': 16536, 'epoch': 3} {'type': 'loss', 'content': 0.11103277653455734, 'timestamp': '2025-09-30 22:38:26.286283', 'step': 16537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:26.356264', 'step': 16537, 'epoch': 3} {'type': 'loss', 'content': 0.13894304633140564, 'timestamp': '2025-09-30 22:38:26.362011', 'step': 16538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:26.422296', 'step': 16538, 'epoch': 3} {'type': 'loss', 'content': 0.07789146155118942, 'timestamp': '2025-09-30 22:38:26.428288', 'step': 16539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:26.517775', 'step': 16539, 'epoch': 3} {'type': 'loss', 'content': 0.07623212784528732, 'timestamp': '2025-09-30 22:38:26.525602', 'step': 16540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:26.589281', 'step': 16540, 'epoch': 3} {'type': 'loss', 'content': 0.12428189069032669, 'timestamp': '2025-09-30 22:38:26.593753', 'step': 16541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:26.652697', 'step': 16541, 'epoch': 3} {'type': 'loss', 'content': 0.07975387573242188, 'timestamp': '2025-09-30 22:38:26.655743', 'step': 16542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:26.725657', 'step': 16542, 'epoch': 3} {'type': 'loss', 'content': 0.132796049118042, 'timestamp': '2025-09-30 22:38:26.739335', 'step': 16543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:26.799485', 'step': 16543, 'epoch': 3} {'type': 'loss', 'content': 0.1373620331287384, 'timestamp': '2025-09-30 22:38:26.807455', 'step': 16544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:26.867290', 'step': 16544, 'epoch': 3} {'type': 'loss', 'content': 0.06303484737873077, 'timestamp': '2025-09-30 22:38:26.871982', 'step': 16545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:26.935248', 'step': 16545, 'epoch': 3} {'type': 'loss', 'content': 0.09317706525325775, 'timestamp': '2025-09-30 22:38:26.939274', 'step': 16546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:27.008734', 'step': 16546, 'epoch': 3} {'type': 'loss', 'content': 0.09532883018255234, 'timestamp': '2025-09-30 22:38:27.026517', 'step': 16547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:27.088455', 'step': 16547, 'epoch': 3} {'type': 'loss', 'content': 0.07293056696653366, 'timestamp': '2025-09-30 22:38:27.096003', 'step': 16548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:27.176677', 'step': 16548, 'epoch': 3} {'type': 'loss', 'content': 0.08042820543050766, 'timestamp': '2025-09-30 22:38:27.181614', 'step': 16549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:27.249800', 'step': 16549, 'epoch': 3} {'type': 'loss', 'content': 0.156650573015213, 'timestamp': '2025-09-30 22:38:27.255019', 'step': 16550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:27.321482', 'step': 16550, 'epoch': 3} {'type': 'loss', 'content': 0.06059933453798294, 'timestamp': '2025-09-30 22:38:27.327091', 'step': 16551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:27.387003', 'step': 16551, 'epoch': 3} {'type': 'loss', 'content': 0.10055384784936905, 'timestamp': '2025-09-30 22:38:27.397605', 'step': 16552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:27.457153', 'step': 16552, 'epoch': 3} {'type': 'loss', 'content': 0.08566487580537796, 'timestamp': '2025-09-30 22:38:27.468498', 'step': 16553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:27.534931', 'step': 16553, 'epoch': 3} {'type': 'loss', 'content': 0.13914380967617035, 'timestamp': '2025-09-30 22:38:27.540405', 'step': 16554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:27.603259', 'step': 16554, 'epoch': 3} {'type': 'loss', 'content': 0.09294825047254562, 'timestamp': '2025-09-30 22:38:27.607326', 'step': 16555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:27.668182', 'step': 16555, 'epoch': 3} {'type': 'loss', 'content': 0.09261290729045868, 'timestamp': '2025-09-30 22:38:27.675422', 'step': 16556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:27.733117', 'step': 16556, 'epoch': 3} {'type': 'loss', 'content': 0.15628327429294586, 'timestamp': '2025-09-30 22:38:27.737171', 'step': 16557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:27.810611', 'step': 16557, 'epoch': 3} {'type': 'loss', 'content': 0.1580187976360321, 'timestamp': '2025-09-30 22:38:27.833905', 'step': 16558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:27.901628', 'step': 16558, 'epoch': 3} {'type': 'loss', 'content': 0.14097242057323456, 'timestamp': '2025-09-30 22:38:27.906749', 'step': 16559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:27.964603', 'step': 16559, 'epoch': 3} {'type': 'loss', 'content': 0.14482010900974274, 'timestamp': '2025-09-30 22:38:27.987357', 'step': 16560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:28.046905', 'step': 16560, 'epoch': 3} {'type': 'loss', 'content': 0.1418916881084442, 'timestamp': '2025-09-30 22:38:28.058688', 'step': 16561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:28.129221', 'step': 16561, 'epoch': 3} {'type': 'loss', 'content': 0.08229526877403259, 'timestamp': '2025-09-30 22:38:28.146444', 'step': 16562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.207639', 'step': 16562, 'epoch': 3} {'type': 'loss', 'content': 0.1159692034125328, 'timestamp': '2025-09-30 22:38:28.211394', 'step': 16563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:28.270409', 'step': 16563, 'epoch': 3} {'type': 'loss', 'content': 0.1455458253622055, 'timestamp': '2025-09-30 22:38:28.277951', 'step': 16564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:28.335135', 'step': 16564, 'epoch': 3} {'type': 'loss', 'content': 0.0679963082075119, 'timestamp': '2025-09-30 22:38:28.340971', 'step': 16565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.404585', 'step': 16565, 'epoch': 3} {'type': 'loss', 'content': 0.06664074957370758, 'timestamp': '2025-09-30 22:38:28.411457', 'step': 16566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.470553', 'step': 16566, 'epoch': 3} {'type': 'loss', 'content': 0.04228464141488075, 'timestamp': '2025-09-30 22:38:28.488395', 'step': 16567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:28.562948', 'step': 16567, 'epoch': 3} {'type': 'loss', 'content': 0.06474092602729797, 'timestamp': '2025-09-30 22:38:28.570838', 'step': 16568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.630261', 'step': 16568, 'epoch': 3} {'type': 'loss', 'content': 0.09634732455015182, 'timestamp': '2025-09-30 22:38:28.648256', 'step': 16569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.707181', 'step': 16569, 'epoch': 3} {'type': 'loss', 'content': 0.11388233304023743, 'timestamp': '2025-09-30 22:38:28.711251', 'step': 16570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.770253', 'step': 16570, 'epoch': 3} {'type': 'loss', 'content': 0.08777976036071777, 'timestamp': '2025-09-30 22:38:28.774108', 'step': 16571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.835233', 'step': 16571, 'epoch': 3} {'type': 'loss', 'content': 0.022226421162486076, 'timestamp': '2025-09-30 22:38:28.842324', 'step': 16572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:28.905084', 'step': 16572, 'epoch': 3} {'type': 'loss', 'content': 0.13090306520462036, 'timestamp': '2025-09-30 22:38:28.910052', 'step': 16573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:28.985603', 'step': 16573, 'epoch': 3} {'type': 'loss', 'content': 0.0481325201690197, 'timestamp': '2025-09-30 22:38:28.990286', 'step': 16574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:29.050585', 'step': 16574, 'epoch': 3} {'type': 'loss', 'content': 0.1746688038110733, 'timestamp': '2025-09-30 22:38:29.055100', 'step': 16575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:29.125998', 'step': 16575, 'epoch': 3} {'type': 'loss', 'content': 0.09914597868919373, 'timestamp': '2025-09-30 22:38:29.145620', 'step': 16576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:29.203531', 'step': 16576, 'epoch': 3} {'type': 'loss', 'content': 0.05408051609992981, 'timestamp': '2025-09-30 22:38:29.207878', 'step': 16577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:29.276278', 'step': 16577, 'epoch': 3} {'type': 'loss', 'content': 0.07292336225509644, 'timestamp': '2025-09-30 22:38:29.280083', 'step': 16578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:29.343731', 'step': 16578, 'epoch': 3} {'type': 'loss', 'content': 0.08987652510404587, 'timestamp': '2025-09-30 22:38:29.347395', 'step': 16579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:29.405577', 'step': 16579, 'epoch': 3} {'type': 'loss', 'content': 0.10428847372531891, 'timestamp': '2025-09-30 22:38:29.424222', 'step': 16580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:29.482872', 'step': 16580, 'epoch': 3} {'type': 'loss', 'content': 0.04125617817044258, 'timestamp': '2025-09-30 22:38:29.489539', 'step': 16581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:29.561255', 'step': 16581, 'epoch': 3} {'type': 'loss', 'content': 0.09166017919778824, 'timestamp': '2025-09-30 22:38:29.566505', 'step': 16582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:29.629064', 'step': 16582, 'epoch': 3} {'type': 'loss', 'content': 0.08105459064245224, 'timestamp': '2025-09-30 22:38:29.633589', 'step': 16583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:29.692747', 'step': 16583, 'epoch': 3} {'type': 'loss', 'content': 0.07626030594110489, 'timestamp': '2025-09-30 22:38:29.699338', 'step': 16584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:29.763740', 'step': 16584, 'epoch': 3} {'type': 'loss', 'content': 0.08393935859203339, 'timestamp': '2025-09-30 22:38:29.768861', 'step': 16585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:29.827291', 'step': 16585, 'epoch': 3} {'type': 'loss', 'content': 0.056667692959308624, 'timestamp': '2025-09-30 22:38:29.832499', 'step': 16586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:29.893435', 'step': 16586, 'epoch': 3} {'type': 'loss', 'content': 0.11906199157238007, 'timestamp': '2025-09-30 22:38:29.896921', 'step': 16587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:29.956912', 'step': 16587, 'epoch': 3} {'type': 'loss', 'content': 0.12481527775526047, 'timestamp': '2025-09-30 22:38:29.965066', 'step': 16588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:30.023270', 'step': 16588, 'epoch': 3} {'type': 'loss', 'content': 0.03967926651239395, 'timestamp': '2025-09-30 22:38:30.027196', 'step': 16589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:30.085724', 'step': 16589, 'epoch': 3} {'type': 'loss', 'content': 0.08891531825065613, 'timestamp': '2025-09-30 22:38:30.089949', 'step': 16590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:30.160435', 'step': 16590, 'epoch': 3} {'type': 'loss', 'content': 0.12985505163669586, 'timestamp': '2025-09-30 22:38:30.164991', 'step': 16591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:30.223437', 'step': 16591, 'epoch': 3} {'type': 'loss', 'content': 0.11025978624820709, 'timestamp': '2025-09-30 22:38:30.231295', 'step': 16592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:30.299676', 'step': 16592, 'epoch': 3} {'type': 'loss', 'content': 0.06395582854747772, 'timestamp': '2025-09-30 22:38:30.303115', 'step': 16593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:30.371382', 'step': 16593, 'epoch': 3} {'type': 'loss', 'content': 0.07337357848882675, 'timestamp': '2025-09-30 22:38:30.374903', 'step': 16594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:30.437601', 'step': 16594, 'epoch': 3} {'type': 'loss', 'content': 0.14800462126731873, 'timestamp': '2025-09-30 22:38:30.443357', 'step': 16595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:30.503566', 'step': 16595, 'epoch': 3} {'type': 'loss', 'content': 0.10762469470500946, 'timestamp': '2025-09-30 22:38:30.512577', 'step': 16596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:30.570375', 'step': 16596, 'epoch': 3} {'type': 'loss', 'content': 0.07250994443893433, 'timestamp': '2025-09-30 22:38:30.575160', 'step': 16597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:30.642603', 'step': 16597, 'epoch': 3} {'type': 'loss', 'content': 0.08612272143363953, 'timestamp': '2025-09-30 22:38:30.646304', 'step': 16598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:38:30.716853', 'step': 16598, 'epoch': 3} {'type': 'loss', 'content': 0.08287091553211212, 'timestamp': '2025-09-30 22:38:30.734296', 'step': 16599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:30.793300', 'step': 16599, 'epoch': 3} {'type': 'loss', 'content': 0.13051484525203705, 'timestamp': '2025-09-30 22:38:30.800230', 'step': 16600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:30.857339', 'step': 16600, 'epoch': 3} {'type': 'loss', 'content': 0.09211984276771545, 'timestamp': '2025-09-30 22:38:30.872357', 'step': 16601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:30.946390', 'step': 16601, 'epoch': 3} {'type': 'loss', 'content': 0.06008552759885788, 'timestamp': '2025-09-30 22:38:30.951593', 'step': 16602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:31.012333', 'step': 16602, 'epoch': 3} {'type': 'loss', 'content': 0.05995643511414528, 'timestamp': '2025-09-30 22:38:31.017255', 'step': 16603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:31.080197', 'step': 16603, 'epoch': 3} {'type': 'loss', 'content': 0.10506108403205872, 'timestamp': '2025-09-30 22:38:31.088342', 'step': 16604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:31.145527', 'step': 16604, 'epoch': 3} {'type': 'loss', 'content': 0.1585278958082199, 'timestamp': '2025-09-30 22:38:31.148194', 'step': 16605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:31.217213', 'step': 16605, 'epoch': 3} {'type': 'loss', 'content': 0.05694093927741051, 'timestamp': '2025-09-30 22:38:31.221980', 'step': 16606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:31.290073', 'step': 16606, 'epoch': 3} {'type': 'loss', 'content': 0.1942095160484314, 'timestamp': '2025-09-30 22:38:31.294545', 'step': 16607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:31.352521', 'step': 16607, 'epoch': 3} {'type': 'loss', 'content': 0.015856022015213966, 'timestamp': '2025-09-30 22:38:31.361252', 'step': 16608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:31.418250', 'step': 16608, 'epoch': 3} {'type': 'loss', 'content': 0.043600503355264664, 'timestamp': '2025-09-30 22:38:31.423125', 'step': 16609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:31.488649', 'step': 16609, 'epoch': 3} {'type': 'loss', 'content': 0.13491412997245789, 'timestamp': '2025-09-30 22:38:31.493726', 'step': 16610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:31.568654', 'step': 16610, 'epoch': 3} {'type': 'loss', 'content': 0.15028561651706696, 'timestamp': '2025-09-30 22:38:31.572993', 'step': 16611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:31.634306', 'step': 16611, 'epoch': 3} {'type': 'loss', 'content': 0.06358382105827332, 'timestamp': '2025-09-30 22:38:31.641494', 'step': 16612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:31.701935', 'step': 16612, 'epoch': 3} {'type': 'loss', 'content': 0.15215516090393066, 'timestamp': '2025-09-30 22:38:31.716452', 'step': 16613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:31.775098', 'step': 16613, 'epoch': 3} {'type': 'loss', 'content': 0.08648879826068878, 'timestamp': '2025-09-30 22:38:31.779180', 'step': 16614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:31.848528', 'step': 16614, 'epoch': 3} {'type': 'loss', 'content': 0.07241345942020416, 'timestamp': '2025-09-30 22:38:31.852462', 'step': 16615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:31.911167', 'step': 16615, 'epoch': 3} {'type': 'loss', 'content': 0.061366744339466095, 'timestamp': '2025-09-30 22:38:31.918934', 'step': 16616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:31.976402', 'step': 16616, 'epoch': 3} {'type': 'loss', 'content': 0.08314032852649689, 'timestamp': '2025-09-30 22:38:31.979689', 'step': 16617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:32.037790', 'step': 16617, 'epoch': 3} {'type': 'loss', 'content': 0.07652609795331955, 'timestamp': '2025-09-30 22:38:32.045446', 'step': 16618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:32.128080', 'step': 16618, 'epoch': 3} {'type': 'loss', 'content': 0.06091742590069771, 'timestamp': '2025-09-30 22:38:32.142793', 'step': 16619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:32.210463', 'step': 16619, 'epoch': 3} {'type': 'loss', 'content': 0.1724846512079239, 'timestamp': '2025-09-30 22:38:32.217747', 'step': 16620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:32.275688', 'step': 16620, 'epoch': 3} {'type': 'loss', 'content': 0.03321574255824089, 'timestamp': '2025-09-30 22:38:32.280167', 'step': 16621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:32.344516', 'step': 16621, 'epoch': 3} {'type': 'loss', 'content': 0.08944475650787354, 'timestamp': '2025-09-30 22:38:32.348593', 'step': 16622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:32.422114', 'step': 16622, 'epoch': 3} {'type': 'loss', 'content': 0.06779411435127258, 'timestamp': '2025-09-30 22:38:32.440913', 'step': 16623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:32.524089', 'step': 16623, 'epoch': 3} {'type': 'loss', 'content': 0.04517881199717522, 'timestamp': '2025-09-30 22:38:32.545641', 'step': 16624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:32.614955', 'step': 16624, 'epoch': 3} {'type': 'loss', 'content': 0.18983499705791473, 'timestamp': '2025-09-30 22:38:32.621328', 'step': 16625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:32.696423', 'step': 16625, 'epoch': 3} {'type': 'loss', 'content': 0.1584359109401703, 'timestamp': '2025-09-30 22:38:32.716063', 'step': 16626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:32.805546', 'step': 16626, 'epoch': 3} {'type': 'loss', 'content': 0.08075570315122604, 'timestamp': '2025-09-30 22:38:32.826967', 'step': 16627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:32.898749', 'step': 16627, 'epoch': 3} {'type': 'loss', 'content': 0.09949533641338348, 'timestamp': '2025-09-30 22:38:32.921726', 'step': 16628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:32.981482', 'step': 16628, 'epoch': 3} {'type': 'loss', 'content': 0.0910346657037735, 'timestamp': '2025-09-30 22:38:32.986960', 'step': 16629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:33.052953', 'step': 16629, 'epoch': 3} {'type': 'loss', 'content': 0.08470091968774796, 'timestamp': '2025-09-30 22:38:33.069740', 'step': 16630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:38:33.128336', 'step': 16630, 'epoch': 3} {'type': 'loss', 'content': 0.1156306266784668, 'timestamp': '2025-09-30 22:38:33.135380', 'step': 16631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:33.193227', 'step': 16631, 'epoch': 3} {'type': 'loss', 'content': 0.09397347271442413, 'timestamp': '2025-09-30 22:38:33.200409', 'step': 16632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:33.271948', 'step': 16632, 'epoch': 3} {'type': 'loss', 'content': 0.03550983592867851, 'timestamp': '2025-09-30 22:38:33.276041', 'step': 16633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:33.355868', 'step': 16633, 'epoch': 3} {'type': 'loss', 'content': 0.1820123791694641, 'timestamp': '2025-09-30 22:38:33.360893', 'step': 16634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:33.418077', 'step': 16634, 'epoch': 3} {'type': 'loss', 'content': 0.19200317561626434, 'timestamp': '2025-09-30 22:38:33.425779', 'step': 16635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:33.484319', 'step': 16635, 'epoch': 3} {'type': 'loss', 'content': 0.041114624589681625, 'timestamp': '2025-09-30 22:38:33.491302', 'step': 16636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:33.556580', 'step': 16636, 'epoch': 3} {'type': 'loss', 'content': 0.04831944778561592, 'timestamp': '2025-09-30 22:38:33.562214', 'step': 16637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:33.621187', 'step': 16637, 'epoch': 3} {'type': 'loss', 'content': 0.08709488064050674, 'timestamp': '2025-09-30 22:38:33.624385', 'step': 16638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:33.693400', 'step': 16638, 'epoch': 3} {'type': 'loss', 'content': 0.2439194768667221, 'timestamp': '2025-09-30 22:38:33.710129', 'step': 16639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:33.769817', 'step': 16639, 'epoch': 3} {'type': 'loss', 'content': 0.07575090229511261, 'timestamp': '2025-09-30 22:38:33.777869', 'step': 16640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:33.839696', 'step': 16640, 'epoch': 3} {'type': 'loss', 'content': 0.08006636798381805, 'timestamp': '2025-09-30 22:38:33.844886', 'step': 16641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:33.904165', 'step': 16641, 'epoch': 3} {'type': 'loss', 'content': 0.09475968033075333, 'timestamp': '2025-09-30 22:38:33.907152', 'step': 16642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:33.978351', 'step': 16642, 'epoch': 3} {'type': 'loss', 'content': 0.12476591020822525, 'timestamp': '2025-09-30 22:38:33.994825', 'step': 16643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:34.054445', 'step': 16643, 'epoch': 3} {'type': 'loss', 'content': 0.09978415817022324, 'timestamp': '2025-09-30 22:38:34.062897', 'step': 16644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:34.120852', 'step': 16644, 'epoch': 3} {'type': 'loss', 'content': 0.04889011010527611, 'timestamp': '2025-09-30 22:38:34.126483', 'step': 16645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:34.185324', 'step': 16645, 'epoch': 3} {'type': 'loss', 'content': 0.08740690350532532, 'timestamp': '2025-09-30 22:38:34.189249', 'step': 16646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:34.255557', 'step': 16646, 'epoch': 3} {'type': 'loss', 'content': 0.11731608211994171, 'timestamp': '2025-09-30 22:38:34.263902', 'step': 16647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:34.342075', 'step': 16647, 'epoch': 3} {'type': 'loss', 'content': 0.07051226496696472, 'timestamp': '2025-09-30 22:38:34.349313', 'step': 16648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:34.409466', 'step': 16648, 'epoch': 3} {'type': 'loss', 'content': 0.13848745822906494, 'timestamp': '2025-09-30 22:38:34.413076', 'step': 16649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:34.485175', 'step': 16649, 'epoch': 3} {'type': 'loss', 'content': 0.08248534798622131, 'timestamp': '2025-09-30 22:38:34.490114', 'step': 16650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:34.556714', 'step': 16650, 'epoch': 3} {'type': 'loss', 'content': 0.04408117011189461, 'timestamp': '2025-09-30 22:38:34.561562', 'step': 16651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:34.648802', 'step': 16651, 'epoch': 3} {'type': 'loss', 'content': 0.10330981761217117, 'timestamp': '2025-09-30 22:38:34.659902', 'step': 16652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:34.719908', 'step': 16652, 'epoch': 3} {'type': 'loss', 'content': 0.11383545398712158, 'timestamp': '2025-09-30 22:38:34.726896', 'step': 16653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:34.786608', 'step': 16653, 'epoch': 3} {'type': 'loss', 'content': 0.10685349255800247, 'timestamp': '2025-09-30 22:38:34.791103', 'step': 16654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:34.865400', 'step': 16654, 'epoch': 3} {'type': 'loss', 'content': 0.10222850739955902, 'timestamp': '2025-09-30 22:38:34.871324', 'step': 16655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:34.932403', 'step': 16655, 'epoch': 3} {'type': 'loss', 'content': 0.1400434821844101, 'timestamp': '2025-09-30 22:38:34.940500', 'step': 16656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:35.012359', 'step': 16656, 'epoch': 3} {'type': 'loss', 'content': 0.047546252608299255, 'timestamp': '2025-09-30 22:38:35.030865', 'step': 16657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:35.089875', 'step': 16657, 'epoch': 3} {'type': 'loss', 'content': 0.12413211166858673, 'timestamp': '2025-09-30 22:38:35.093780', 'step': 16658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:35.153251', 'step': 16658, 'epoch': 3} {'type': 'loss', 'content': 0.1440904289484024, 'timestamp': '2025-09-30 22:38:35.158178', 'step': 16659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:35.218157', 'step': 16659, 'epoch': 3} {'type': 'loss', 'content': 0.1468091756105423, 'timestamp': '2025-09-30 22:38:35.226755', 'step': 16660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:35.288197', 'step': 16660, 'epoch': 3} {'type': 'loss', 'content': 0.09487935900688171, 'timestamp': '2025-09-30 22:38:35.303172', 'step': 16661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:35.361192', 'step': 16661, 'epoch': 3} {'type': 'loss', 'content': 0.08621696382761002, 'timestamp': '2025-09-30 22:38:35.365294', 'step': 16662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:35.431925', 'step': 16662, 'epoch': 3} {'type': 'loss', 'content': 0.06578591465950012, 'timestamp': '2025-09-30 22:38:35.436935', 'step': 16663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:35.496290', 'step': 16663, 'epoch': 3} {'type': 'loss', 'content': 0.04662508890032768, 'timestamp': '2025-09-30 22:38:35.514809', 'step': 16664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:35.572080', 'step': 16664, 'epoch': 3} {'type': 'loss', 'content': 0.14364977180957794, 'timestamp': '2025-09-30 22:38:35.575588', 'step': 16665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:35.634232', 'step': 16665, 'epoch': 3} {'type': 'loss', 'content': 0.1109575405716896, 'timestamp': '2025-09-30 22:38:35.646839', 'step': 16666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:35.711275', 'step': 16666, 'epoch': 3} {'type': 'loss', 'content': 0.05520505830645561, 'timestamp': '2025-09-30 22:38:35.716026', 'step': 16667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:35.775364', 'step': 16667, 'epoch': 3} {'type': 'loss', 'content': 0.06468097120523453, 'timestamp': '2025-09-30 22:38:35.783130', 'step': 16668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:35.850569', 'step': 16668, 'epoch': 3} {'type': 'loss', 'content': 0.07509089261293411, 'timestamp': '2025-09-30 22:38:35.854264', 'step': 16669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:35.927770', 'step': 16669, 'epoch': 3} {'type': 'loss', 'content': 0.09477342665195465, 'timestamp': '2025-09-30 22:38:35.933473', 'step': 16670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:35.992482', 'step': 16670, 'epoch': 3} {'type': 'loss', 'content': 0.11563577502965927, 'timestamp': '2025-09-30 22:38:35.998544', 'step': 16671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:36.073072', 'step': 16671, 'epoch': 3} {'type': 'loss', 'content': 0.09998961538076401, 'timestamp': '2025-09-30 22:38:36.080562', 'step': 16672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:36.152590', 'step': 16672, 'epoch': 3} {'type': 'loss', 'content': 0.10024743527173996, 'timestamp': '2025-09-30 22:38:36.156321', 'step': 16673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:36.217076', 'step': 16673, 'epoch': 3} {'type': 'loss', 'content': 0.10287151485681534, 'timestamp': '2025-09-30 22:38:36.222546', 'step': 16674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:36.285464', 'step': 16674, 'epoch': 3} {'type': 'loss', 'content': 0.06713790446519852, 'timestamp': '2025-09-30 22:38:36.289271', 'step': 16675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:36.358963', 'step': 16675, 'epoch': 3} {'type': 'loss', 'content': 0.046349506825208664, 'timestamp': '2025-09-30 22:38:36.366806', 'step': 16676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:36.424665', 'step': 16676, 'epoch': 3} {'type': 'loss', 'content': 0.0876770094037056, 'timestamp': '2025-09-30 22:38:36.438873', 'step': 16677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:36.499070', 'step': 16677, 'epoch': 3} {'type': 'loss', 'content': 0.06487149745225906, 'timestamp': '2025-09-30 22:38:36.505523', 'step': 16678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:36.567193', 'step': 16678, 'epoch': 3} {'type': 'loss', 'content': 0.07914010435342789, 'timestamp': '2025-09-30 22:38:36.570401', 'step': 16679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:36.644858', 'step': 16679, 'epoch': 3} {'type': 'loss', 'content': 0.11029883474111557, 'timestamp': '2025-09-30 22:38:36.652314', 'step': 16680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:36.709335', 'step': 16680, 'epoch': 3} {'type': 'loss', 'content': 0.08933138102293015, 'timestamp': '2025-09-30 22:38:36.726292', 'step': 16681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:36.795465', 'step': 16681, 'epoch': 3} {'type': 'loss', 'content': 0.11091069132089615, 'timestamp': '2025-09-30 22:38:36.801528', 'step': 16682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:36.859845', 'step': 16682, 'epoch': 3} {'type': 'loss', 'content': 0.09960213303565979, 'timestamp': '2025-09-30 22:38:36.873884', 'step': 16683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:36.932192', 'step': 16683, 'epoch': 3} {'type': 'loss', 'content': 0.1573912650346756, 'timestamp': '2025-09-30 22:38:36.951505', 'step': 16684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:37.008315', 'step': 16684, 'epoch': 3} {'type': 'loss', 'content': 0.0808204859495163, 'timestamp': '2025-09-30 22:38:37.012056', 'step': 16685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:37.069119', 'step': 16685, 'epoch': 3} {'type': 'loss', 'content': 0.06851710379123688, 'timestamp': '2025-09-30 22:38:37.072864', 'step': 16686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:37.131540', 'step': 16686, 'epoch': 3} {'type': 'loss', 'content': 0.07271435856819153, 'timestamp': '2025-09-30 22:38:37.134473', 'step': 16687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:37.202091', 'step': 16687, 'epoch': 3} {'type': 'loss', 'content': 0.10516542941331863, 'timestamp': '2025-09-30 22:38:37.210375', 'step': 16688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:37.268190', 'step': 16688, 'epoch': 3} {'type': 'loss', 'content': 0.05728079006075859, 'timestamp': '2025-09-30 22:38:37.272403', 'step': 16689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:37.342763', 'step': 16689, 'epoch': 3} {'type': 'loss', 'content': 0.09752843528985977, 'timestamp': '2025-09-30 22:38:37.346361', 'step': 16690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:37.412850', 'step': 16690, 'epoch': 3} {'type': 'loss', 'content': 0.09947384148836136, 'timestamp': '2025-09-30 22:38:37.418046', 'step': 16691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:37.487182', 'step': 16691, 'epoch': 3} {'type': 'loss', 'content': 0.058575842529535294, 'timestamp': '2025-09-30 22:38:37.495156', 'step': 16692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:37.553242', 'step': 16692, 'epoch': 3} {'type': 'loss', 'content': 0.09626202285289764, 'timestamp': '2025-09-30 22:38:37.556205', 'step': 16693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:37.618126', 'step': 16693, 'epoch': 3} {'type': 'loss', 'content': 0.13971400260925293, 'timestamp': '2025-09-30 22:38:37.622243', 'step': 16694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:37.680612', 'step': 16694, 'epoch': 3} {'type': 'loss', 'content': 0.1533413678407669, 'timestamp': '2025-09-30 22:38:37.685312', 'step': 16695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:37.745609', 'step': 16695, 'epoch': 3} {'type': 'loss', 'content': 0.1632121354341507, 'timestamp': '2025-09-30 22:38:37.753479', 'step': 16696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:37.824640', 'step': 16696, 'epoch': 3} {'type': 'loss', 'content': 0.11181756108999252, 'timestamp': '2025-09-30 22:38:37.828463', 'step': 16697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:37.893764', 'step': 16697, 'epoch': 3} {'type': 'loss', 'content': 0.13444632291793823, 'timestamp': '2025-09-30 22:38:37.897105', 'step': 16698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:37.956177', 'step': 16698, 'epoch': 3} {'type': 'loss', 'content': 0.11627136915922165, 'timestamp': '2025-09-30 22:38:37.959228', 'step': 16699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:38.016691', 'step': 16699, 'epoch': 3} {'type': 'loss', 'content': 0.08837489038705826, 'timestamp': '2025-09-30 22:38:38.022816', 'step': 16700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.080913', 'step': 16700, 'epoch': 3} {'type': 'loss', 'content': 0.12468282133340836, 'timestamp': '2025-09-30 22:38:38.085149', 'step': 16701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:38.146706', 'step': 16701, 'epoch': 3} {'type': 'loss', 'content': 0.08798693865537643, 'timestamp': '2025-09-30 22:38:38.149243', 'step': 16702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.206834', 'step': 16702, 'epoch': 3} {'type': 'loss', 'content': 0.09246042370796204, 'timestamp': '2025-09-30 22:38:38.210330', 'step': 16703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:38.277770', 'step': 16703, 'epoch': 3} {'type': 'loss', 'content': 0.09089856594800949, 'timestamp': '2025-09-30 22:38:38.285169', 'step': 16704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:38.360835', 'step': 16704, 'epoch': 3} {'type': 'loss', 'content': 0.112387515604496, 'timestamp': '2025-09-30 22:38:38.368403', 'step': 16705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.430614', 'step': 16705, 'epoch': 3} {'type': 'loss', 'content': 0.03411990404129028, 'timestamp': '2025-09-30 22:38:38.433493', 'step': 16706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.490681', 'step': 16706, 'epoch': 3} {'type': 'loss', 'content': 0.08317628502845764, 'timestamp': '2025-09-30 22:38:38.497150', 'step': 16707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:38.554114', 'step': 16707, 'epoch': 3} {'type': 'loss', 'content': 0.09056883305311203, 'timestamp': '2025-09-30 22:38:38.559974', 'step': 16708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.616068', 'step': 16708, 'epoch': 3} {'type': 'loss', 'content': 0.07291309535503387, 'timestamp': '2025-09-30 22:38:38.621788', 'step': 16709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:38.678233', 'step': 16709, 'epoch': 3} {'type': 'loss', 'content': 0.13164612650871277, 'timestamp': '2025-09-30 22:38:38.683497', 'step': 16710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:38.745775', 'step': 16710, 'epoch': 3} {'type': 'loss', 'content': 0.07326339930295944, 'timestamp': '2025-09-30 22:38:38.748538', 'step': 16711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.805772', 'step': 16711, 'epoch': 3} {'type': 'loss', 'content': 0.10003194957971573, 'timestamp': '2025-09-30 22:38:38.812312', 'step': 16712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.870404', 'step': 16712, 'epoch': 3} {'type': 'loss', 'content': 0.0718146339058876, 'timestamp': '2025-09-30 22:38:38.872935', 'step': 16713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:38.931246', 'step': 16713, 'epoch': 3} {'type': 'loss', 'content': 0.12261372804641724, 'timestamp': '2025-09-30 22:38:38.935377', 'step': 16714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:38.993902', 'step': 16714, 'epoch': 3} {'type': 'loss', 'content': 0.0661299005150795, 'timestamp': '2025-09-30 22:38:38.996206', 'step': 16715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:39.052817', 'step': 16715, 'epoch': 3} {'type': 'loss', 'content': 0.13001322746276855, 'timestamp': '2025-09-30 22:38:39.059818', 'step': 16716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:39.116587', 'step': 16716, 'epoch': 3} {'type': 'loss', 'content': 0.18446506559848785, 'timestamp': '2025-09-30 22:38:39.118919', 'step': 16717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:39.176423', 'step': 16717, 'epoch': 3} {'type': 'loss', 'content': 0.17751947045326233, 'timestamp': '2025-09-30 22:38:39.179855', 'step': 16718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:39.238551', 'step': 16718, 'epoch': 3} {'type': 'loss', 'content': 0.11273546516895294, 'timestamp': '2025-09-30 22:38:39.241413', 'step': 16719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:39.297968', 'step': 16719, 'epoch': 3} {'type': 'loss', 'content': 0.05669967830181122, 'timestamp': '2025-09-30 22:38:39.303883', 'step': 16720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:39.364623', 'step': 16720, 'epoch': 3} {'type': 'loss', 'content': 0.09832751750946045, 'timestamp': '2025-09-30 22:38:39.367171', 'step': 16721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:39.423130', 'step': 16721, 'epoch': 3} {'type': 'loss', 'content': 0.0681283175945282, 'timestamp': '2025-09-30 22:38:39.425276', 'step': 16722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:39.483341', 'step': 16722, 'epoch': 3} {'type': 'loss', 'content': 0.07954976707696915, 'timestamp': '2025-09-30 22:38:39.486065', 'step': 16723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:39.543469', 'step': 16723, 'epoch': 3} {'type': 'loss', 'content': 0.16409312188625336, 'timestamp': '2025-09-30 22:38:39.550989', 'step': 16724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:39.608866', 'step': 16724, 'epoch': 3} {'type': 'loss', 'content': 0.14196249842643738, 'timestamp': '2025-09-30 22:38:39.611311', 'step': 16725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:39.670558', 'step': 16725, 'epoch': 3} {'type': 'loss', 'content': 0.15596655011177063, 'timestamp': '2025-09-30 22:38:39.673154', 'step': 16726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:39.733247', 'step': 16726, 'epoch': 3} {'type': 'loss', 'content': 0.1206524521112442, 'timestamp': '2025-09-30 22:38:39.737416', 'step': 16727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:39.795433', 'step': 16727, 'epoch': 3} {'type': 'loss', 'content': 0.12255216389894485, 'timestamp': '2025-09-30 22:38:39.801999', 'step': 16728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:39.858626', 'step': 16728, 'epoch': 3} {'type': 'loss', 'content': 0.05099662393331528, 'timestamp': '2025-09-30 22:38:39.863558', 'step': 16729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:38:39.921681', 'step': 16729, 'epoch': 3} {'type': 'loss', 'content': 0.1237335130572319, 'timestamp': '2025-09-30 22:38:39.925346', 'step': 16730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:39.982227', 'step': 16730, 'epoch': 3} {'type': 'loss', 'content': 0.05341332405805588, 'timestamp': '2025-09-30 22:38:39.985061', 'step': 16731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:40.042379', 'step': 16731, 'epoch': 3} {'type': 'loss', 'content': 0.10218581557273865, 'timestamp': '2025-09-30 22:38:40.048550', 'step': 16732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:40.104504', 'step': 16732, 'epoch': 3} {'type': 'loss', 'content': 0.08804167062044144, 'timestamp': '2025-09-30 22:38:40.107480', 'step': 16733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:38:40.164863', 'step': 16733, 'epoch': 3} {'type': 'loss', 'content': 0.05360068008303642, 'timestamp': '2025-09-30 22:38:40.167480', 'step': 16734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:40.224161', 'step': 16734, 'epoch': 3} {'type': 'loss', 'content': 0.05281762033700943, 'timestamp': '2025-09-30 22:38:40.226312', 'step': 16735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:40.285672', 'step': 16735, 'epoch': 3} {'type': 'loss', 'content': 0.10371806472539902, 'timestamp': '2025-09-30 22:38:40.291764', 'step': 16736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:40.350057', 'step': 16736, 'epoch': 3} {'type': 'loss', 'content': 0.08259174972772598, 'timestamp': '2025-09-30 22:38:40.353016', 'step': 16737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:40.409579', 'step': 16737, 'epoch': 3} {'type': 'loss', 'content': 0.047632087022066116, 'timestamp': '2025-09-30 22:38:40.412340', 'step': 16738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:40.471818', 'step': 16738, 'epoch': 3} {'type': 'loss', 'content': 0.13948214054107666, 'timestamp': '2025-09-30 22:38:40.474135', 'step': 16739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:40.530516', 'step': 16739, 'epoch': 3} {'type': 'loss', 'content': 0.12233114242553711, 'timestamp': '2025-09-30 22:38:40.537538', 'step': 16740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:40.593036', 'step': 16740, 'epoch': 3} {'type': 'loss', 'content': 0.03060930408537388, 'timestamp': '2025-09-30 22:38:40.599227', 'step': 16741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:40.660111', 'step': 16741, 'epoch': 3} {'type': 'loss', 'content': 0.12610045075416565, 'timestamp': '2025-09-30 22:38:40.662643', 'step': 16742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:40.729122', 'step': 16742, 'epoch': 3} {'type': 'loss', 'content': 0.08592301607131958, 'timestamp': '2025-09-30 22:38:40.731843', 'step': 16743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:40.788238', 'step': 16743, 'epoch': 3} {'type': 'loss', 'content': 0.07170019298791885, 'timestamp': '2025-09-30 22:38:40.794114', 'step': 16744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:38:40.850222', 'step': 16744, 'epoch': 3} {'type': 'loss', 'content': 0.1500978320837021, 'timestamp': '2025-09-30 22:38:40.852804', 'step': 16745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:40.909733', 'step': 16745, 'epoch': 3} {'type': 'loss', 'content': 0.0854172334074974, 'timestamp': '2025-09-30 22:38:40.912619', 'step': 16746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:40.971755', 'step': 16746, 'epoch': 3} {'type': 'loss', 'content': 0.13250988721847534, 'timestamp': '2025-09-30 22:38:40.974049', 'step': 16747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:41.031231', 'step': 16747, 'epoch': 3} {'type': 'loss', 'content': 0.05381142348051071, 'timestamp': '2025-09-30 22:38:41.037250', 'step': 16748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:41.093324', 'step': 16748, 'epoch': 3} {'type': 'loss', 'content': 0.07528673857450485, 'timestamp': '2025-09-30 22:38:41.095916', 'step': 16749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:41.154774', 'step': 16749, 'epoch': 3} {'type': 'loss', 'content': 0.029296010732650757, 'timestamp': '2025-09-30 22:38:41.156951', 'step': 16750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:41.227437', 'step': 16750, 'epoch': 3} {'type': 'loss', 'content': 0.10607733577489853, 'timestamp': '2025-09-30 22:38:41.229590', 'step': 16751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:41.287313', 'step': 16751, 'epoch': 3} {'type': 'loss', 'content': 0.13893704116344452, 'timestamp': '2025-09-30 22:38:41.294758', 'step': 16752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:41.351867', 'step': 16752, 'epoch': 3} {'type': 'loss', 'content': 0.13278718292713165, 'timestamp': '2025-09-30 22:38:41.354649', 'step': 16753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:41.413596', 'step': 16753, 'epoch': 3} {'type': 'loss', 'content': 0.10536210238933563, 'timestamp': '2025-09-30 22:38:41.418068', 'step': 16754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:41.477398', 'step': 16754, 'epoch': 3} {'type': 'loss', 'content': 0.05567820742726326, 'timestamp': '2025-09-30 22:38:41.479461', 'step': 16755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:41.538280', 'step': 16755, 'epoch': 3} {'type': 'loss', 'content': 0.13531921803951263, 'timestamp': '2025-09-30 22:38:41.544435', 'step': 16756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:41.601324', 'step': 16756, 'epoch': 3} {'type': 'loss', 'content': 0.032249078154563904, 'timestamp': '2025-09-30 22:38:41.603612', 'step': 16757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:38:41.666546', 'step': 16757, 'epoch': 3} {'type': 'loss', 'content': 0.09889750927686691, 'timestamp': '2025-09-30 22:38:41.668575', 'step': 16758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:41.726987', 'step': 16758, 'epoch': 3} {'type': 'loss', 'content': 0.14096830785274506, 'timestamp': '2025-09-30 22:38:41.729211', 'step': 16759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:41.785230', 'step': 16759, 'epoch': 3} {'type': 'loss', 'content': 0.17700348794460297, 'timestamp': '2025-09-30 22:38:41.792481', 'step': 16760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:41.851718', 'step': 16760, 'epoch': 3} {'type': 'loss', 'content': 0.06069198250770569, 'timestamp': '2025-09-30 22:38:41.854110', 'step': 16761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:41.913064', 'step': 16761, 'epoch': 3} {'type': 'loss', 'content': 0.07772382348775864, 'timestamp': '2025-09-30 22:38:41.915733', 'step': 16762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:41.974827', 'step': 16762, 'epoch': 3} {'type': 'loss', 'content': 0.048686571419239044, 'timestamp': '2025-09-30 22:38:41.977338', 'step': 16763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:42.034896', 'step': 16763, 'epoch': 3} {'type': 'loss', 'content': 0.04211370646953583, 'timestamp': '2025-09-30 22:38:42.040772', 'step': 16764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:42.104357', 'step': 16764, 'epoch': 3} {'type': 'loss', 'content': 0.08074986934661865, 'timestamp': '2025-09-30 22:38:42.111400', 'step': 16765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:42.168510', 'step': 16765, 'epoch': 3} {'type': 'loss', 'content': 0.07952480763196945, 'timestamp': '2025-09-30 22:38:42.170929', 'step': 16766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:42.229396', 'step': 16766, 'epoch': 3} {'type': 'loss', 'content': 0.09770147502422333, 'timestamp': '2025-09-30 22:38:42.231757', 'step': 16767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:42.288319', 'step': 16767, 'epoch': 3} {'type': 'loss', 'content': 0.09884859621524811, 'timestamp': '2025-09-30 22:38:42.296183', 'step': 16768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:42.355295', 'step': 16768, 'epoch': 3} {'type': 'loss', 'content': 0.1060640886425972, 'timestamp': '2025-09-30 22:38:42.360959', 'step': 16769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:42.421636', 'step': 16769, 'epoch': 3} {'type': 'loss', 'content': 0.06515107303857803, 'timestamp': '2025-09-30 22:38:42.425217', 'step': 16770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:42.484371', 'step': 16770, 'epoch': 3} {'type': 'loss', 'content': 0.058044545352458954, 'timestamp': '2025-09-30 22:38:42.491812', 'step': 16771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:42.548827', 'step': 16771, 'epoch': 3} {'type': 'loss', 'content': 0.11599350720643997, 'timestamp': '2025-09-30 22:38:42.554770', 'step': 16772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:42.611062', 'step': 16772, 'epoch': 3} {'type': 'loss', 'content': 0.1291498988866806, 'timestamp': '2025-09-30 22:38:42.614408', 'step': 16773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:42.676690', 'step': 16773, 'epoch': 3} {'type': 'loss', 'content': 0.1064746081829071, 'timestamp': '2025-09-30 22:38:42.683516', 'step': 16774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:42.740716', 'step': 16774, 'epoch': 3} {'type': 'loss', 'content': 0.06072152778506279, 'timestamp': '2025-09-30 22:38:42.743373', 'step': 16775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:42.802876', 'step': 16775, 'epoch': 3} {'type': 'loss', 'content': 0.07899018377065659, 'timestamp': '2025-09-30 22:38:42.809072', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:38:58.313930', 'step': 16776, 'epoch': 3} {'type': 'pplx', 'content': 9905.622228236529, 'timestamp': '2025-09-30 22:38:58.317429', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:58.374667', 'step': 16776, 'epoch': 3} {'type': 'loss', 'content': 0.08508950471878052, 'timestamp': '2025-09-30 22:38:58.377932', 'step': 16777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:58.455592', 'step': 16777, 'epoch': 3} {'type': 'loss', 'content': 0.042541563510894775, 'timestamp': '2025-09-30 22:38:58.458490', 'step': 16778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:58.516020', 'step': 16778, 'epoch': 3} {'type': 'loss', 'content': 0.0627095103263855, 'timestamp': '2025-09-30 22:38:58.520516', 'step': 16779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:58.588935', 'step': 16779, 'epoch': 3} {'type': 'loss', 'content': 0.08663402497768402, 'timestamp': '2025-09-30 22:38:58.600379', 'step': 16780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:58.660283', 'step': 16780, 'epoch': 3} {'type': 'loss', 'content': 0.08256325870752335, 'timestamp': '2025-09-30 22:38:58.662681', 'step': 16781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:58.733849', 'step': 16781, 'epoch': 3} {'type': 'loss', 'content': 0.13312754034996033, 'timestamp': '2025-09-30 22:38:58.737356', 'step': 16782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:58.814829', 'step': 16782, 'epoch': 3} {'type': 'loss', 'content': 0.0925564169883728, 'timestamp': '2025-09-30 22:38:58.818837', 'step': 16783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:58.880211', 'step': 16783, 'epoch': 3} {'type': 'loss', 'content': 0.1626792997121811, 'timestamp': '2025-09-30 22:38:58.890363', 'step': 16784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:58.951228', 'step': 16784, 'epoch': 3} {'type': 'loss', 'content': 0.09326328337192535, 'timestamp': '2025-09-30 22:38:58.954962', 'step': 16785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:59.013882', 'step': 16785, 'epoch': 3} {'type': 'loss', 'content': 0.12901361286640167, 'timestamp': '2025-09-30 22:38:59.016288', 'step': 16786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.074190', 'step': 16786, 'epoch': 3} {'type': 'loss', 'content': 0.08171724528074265, 'timestamp': '2025-09-30 22:38:59.077274', 'step': 16787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:59.135959', 'step': 16787, 'epoch': 3} {'type': 'loss', 'content': 0.14966826140880585, 'timestamp': '2025-09-30 22:38:59.143461', 'step': 16788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.203468', 'step': 16788, 'epoch': 3} {'type': 'loss', 'content': 0.08080800622701645, 'timestamp': '2025-09-30 22:38:59.206043', 'step': 16789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.265350', 'step': 16789, 'epoch': 3} {'type': 'loss', 'content': 0.07839236408472061, 'timestamp': '2025-09-30 22:38:59.271011', 'step': 16790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.333573', 'step': 16790, 'epoch': 3} {'type': 'loss', 'content': 0.099359892308712, 'timestamp': '2025-09-30 22:38:59.336643', 'step': 16791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.401781', 'step': 16791, 'epoch': 3} {'type': 'loss', 'content': 0.08408709615468979, 'timestamp': '2025-09-30 22:38:59.410955', 'step': 16792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:59.468182', 'step': 16792, 'epoch': 3} {'type': 'loss', 'content': 0.1202629953622818, 'timestamp': '2025-09-30 22:38:59.470804', 'step': 16793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:38:59.537721', 'step': 16793, 'epoch': 3} {'type': 'loss', 'content': 0.041349995881319046, 'timestamp': '2025-09-30 22:38:59.540850', 'step': 16794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:59.609593', 'step': 16794, 'epoch': 3} {'type': 'loss', 'content': 0.07407092303037643, 'timestamp': '2025-09-30 22:38:59.617281', 'step': 16795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.678968', 'step': 16795, 'epoch': 3} {'type': 'loss', 'content': 0.053727734833955765, 'timestamp': '2025-09-30 22:38:59.685351', 'step': 16796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:38:59.747826', 'step': 16796, 'epoch': 3} {'type': 'loss', 'content': 0.10093460232019424, 'timestamp': '2025-09-30 22:38:59.750804', 'step': 16797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.809623', 'step': 16797, 'epoch': 3} {'type': 'loss', 'content': 0.049801092594861984, 'timestamp': '2025-09-30 22:38:59.811931', 'step': 16798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.870629', 'step': 16798, 'epoch': 3} {'type': 'loss', 'content': 0.08738500624895096, 'timestamp': '2025-09-30 22:38:59.873643', 'step': 16799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:38:59.934452', 'step': 16799, 'epoch': 3} {'type': 'loss', 'content': 0.06419271975755692, 'timestamp': '2025-09-30 22:38:59.940424', 'step': 16800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:38:59.998312', 'step': 16800, 'epoch': 3} {'type': 'loss', 'content': 0.10632631927728653, 'timestamp': '2025-09-30 22:39:00.000794', 'step': 16801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:00.058108', 'step': 16801, 'epoch': 3} {'type': 'loss', 'content': 0.028919914737343788, 'timestamp': '2025-09-30 22:39:00.060808', 'step': 16802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:00.120667', 'step': 16802, 'epoch': 3} {'type': 'loss', 'content': 0.12158206105232239, 'timestamp': '2025-09-30 22:39:00.123202', 'step': 16803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:00.179893', 'step': 16803, 'epoch': 3} {'type': 'loss', 'content': 0.09458816796541214, 'timestamp': '2025-09-30 22:39:00.186143', 'step': 16804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:00.243753', 'step': 16804, 'epoch': 3} {'type': 'loss', 'content': 0.06496726721525192, 'timestamp': '2025-09-30 22:39:00.246070', 'step': 16805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:00.310205', 'step': 16805, 'epoch': 3} {'type': 'loss', 'content': 0.07319559156894684, 'timestamp': '2025-09-30 22:39:00.316863', 'step': 16806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:00.381962', 'step': 16806, 'epoch': 3} {'type': 'loss', 'content': 0.10589989274740219, 'timestamp': '2025-09-30 22:39:00.384987', 'step': 16807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:00.443092', 'step': 16807, 'epoch': 3} {'type': 'loss', 'content': 0.12001214921474457, 'timestamp': '2025-09-30 22:39:00.449403', 'step': 16808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:00.508604', 'step': 16808, 'epoch': 3} {'type': 'loss', 'content': 0.07314154505729675, 'timestamp': '2025-09-30 22:39:00.515796', 'step': 16809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:00.576562', 'step': 16809, 'epoch': 3} {'type': 'loss', 'content': 0.1588887721300125, 'timestamp': '2025-09-30 22:39:00.582419', 'step': 16810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:00.644000', 'step': 16810, 'epoch': 3} {'type': 'loss', 'content': 0.05270078405737877, 'timestamp': '2025-09-30 22:39:00.646453', 'step': 16811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:00.704813', 'step': 16811, 'epoch': 3} {'type': 'loss', 'content': 0.05788622424006462, 'timestamp': '2025-09-30 22:39:00.710741', 'step': 16812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:00.773316', 'step': 16812, 'epoch': 3} {'type': 'loss', 'content': 0.052603382617235184, 'timestamp': '2025-09-30 22:39:00.779091', 'step': 16813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:00.845523', 'step': 16813, 'epoch': 3} {'type': 'loss', 'content': 0.07619209587574005, 'timestamp': '2025-09-30 22:39:00.851756', 'step': 16814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:00.942282', 'step': 16814, 'epoch': 3} {'type': 'loss', 'content': 0.09734009206295013, 'timestamp': '2025-09-30 22:39:00.945282', 'step': 16815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:01.032448', 'step': 16815, 'epoch': 3} {'type': 'loss', 'content': 0.11775582283735275, 'timestamp': '2025-09-30 22:39:01.039074', 'step': 16816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:01.120471', 'step': 16816, 'epoch': 3} {'type': 'loss', 'content': 0.03630480915307999, 'timestamp': '2025-09-30 22:39:01.127283', 'step': 16817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:01.187677', 'step': 16817, 'epoch': 3} {'type': 'loss', 'content': 0.041415438055992126, 'timestamp': '2025-09-30 22:39:01.190008', 'step': 16818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:01.248574', 'step': 16818, 'epoch': 3} {'type': 'loss', 'content': 0.07846280932426453, 'timestamp': '2025-09-30 22:39:01.252048', 'step': 16819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:01.310948', 'step': 16819, 'epoch': 3} {'type': 'loss', 'content': 0.0790252760052681, 'timestamp': '2025-09-30 22:39:01.321497', 'step': 16820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:01.387373', 'step': 16820, 'epoch': 3} {'type': 'loss', 'content': 0.11535623669624329, 'timestamp': '2025-09-30 22:39:01.390622', 'step': 16821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:01.448266', 'step': 16821, 'epoch': 3} {'type': 'loss', 'content': 0.11398404091596603, 'timestamp': '2025-09-30 22:39:01.452419', 'step': 16822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:01.526475', 'step': 16822, 'epoch': 3} {'type': 'loss', 'content': 0.15778155624866486, 'timestamp': '2025-09-30 22:39:01.528728', 'step': 16823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:01.587107', 'step': 16823, 'epoch': 3} {'type': 'loss', 'content': 0.18577420711517334, 'timestamp': '2025-09-30 22:39:01.601196', 'step': 16824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:01.666706', 'step': 16824, 'epoch': 3} {'type': 'loss', 'content': 0.05046255141496658, 'timestamp': '2025-09-30 22:39:01.669189', 'step': 16825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:01.727386', 'step': 16825, 'epoch': 3} {'type': 'loss', 'content': 0.20555207133293152, 'timestamp': '2025-09-30 22:39:01.729948', 'step': 16826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:01.788782', 'step': 16826, 'epoch': 3} {'type': 'loss', 'content': 0.13926203548908234, 'timestamp': '2025-09-30 22:39:01.791144', 'step': 16827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:01.859826', 'step': 16827, 'epoch': 3} {'type': 'loss', 'content': 0.08673427253961563, 'timestamp': '2025-09-30 22:39:01.866440', 'step': 16828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:01.936635', 'step': 16828, 'epoch': 3} {'type': 'loss', 'content': 0.12427841126918793, 'timestamp': '2025-09-30 22:39:01.940577', 'step': 16829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:02.013712', 'step': 16829, 'epoch': 3} {'type': 'loss', 'content': 0.10424277931451797, 'timestamp': '2025-09-30 22:39:02.018508', 'step': 16830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.088401', 'step': 16830, 'epoch': 3} {'type': 'loss', 'content': 0.05659540370106697, 'timestamp': '2025-09-30 22:39:02.096551', 'step': 16831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.155951', 'step': 16831, 'epoch': 3} {'type': 'loss', 'content': 0.09668822586536407, 'timestamp': '2025-09-30 22:39:02.163382', 'step': 16832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.238645', 'step': 16832, 'epoch': 3} {'type': 'loss', 'content': 0.06912806630134583, 'timestamp': '2025-09-30 22:39:02.242975', 'step': 16833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.313721', 'step': 16833, 'epoch': 3} {'type': 'loss', 'content': 0.08230485022068024, 'timestamp': '2025-09-30 22:39:02.316660', 'step': 16834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:02.377141', 'step': 16834, 'epoch': 3} {'type': 'loss', 'content': 0.1095135360956192, 'timestamp': '2025-09-30 22:39:02.379708', 'step': 16835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:02.437803', 'step': 16835, 'epoch': 3} {'type': 'loss', 'content': 0.11011528968811035, 'timestamp': '2025-09-30 22:39:02.443623', 'step': 16836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:02.505938', 'step': 16836, 'epoch': 3} {'type': 'loss', 'content': 0.09985063225030899, 'timestamp': '2025-09-30 22:39:02.508110', 'step': 16837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:02.567361', 'step': 16837, 'epoch': 3} {'type': 'loss', 'content': 0.12776508927345276, 'timestamp': '2025-09-30 22:39:02.569518', 'step': 16838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:02.635684', 'step': 16838, 'epoch': 3} {'type': 'loss', 'content': 0.09675650298595428, 'timestamp': '2025-09-30 22:39:02.641900', 'step': 16839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.707658', 'step': 16839, 'epoch': 3} {'type': 'loss', 'content': 0.06510251760482788, 'timestamp': '2025-09-30 22:39:02.713504', 'step': 16840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.774391', 'step': 16840, 'epoch': 3} {'type': 'loss', 'content': 0.13718029856681824, 'timestamp': '2025-09-30 22:39:02.776997', 'step': 16841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.834483', 'step': 16841, 'epoch': 3} {'type': 'loss', 'content': 0.07698941975831985, 'timestamp': '2025-09-30 22:39:02.839936', 'step': 16842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:02.910460', 'step': 16842, 'epoch': 3} {'type': 'loss', 'content': 0.04390960931777954, 'timestamp': '2025-09-30 22:39:02.913731', 'step': 16843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:02.985395', 'step': 16843, 'epoch': 3} {'type': 'loss', 'content': 0.15253540873527527, 'timestamp': '2025-09-30 22:39:02.991556', 'step': 16844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:03.049454', 'step': 16844, 'epoch': 3} {'type': 'loss', 'content': 0.14703118801116943, 'timestamp': '2025-09-30 22:39:03.052364', 'step': 16845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:03.113698', 'step': 16845, 'epoch': 3} {'type': 'loss', 'content': 0.16071946918964386, 'timestamp': '2025-09-30 22:39:03.116304', 'step': 16846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:03.173972', 'step': 16846, 'epoch': 3} {'type': 'loss', 'content': 0.059783145785331726, 'timestamp': '2025-09-30 22:39:03.176991', 'step': 16847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:03.234215', 'step': 16847, 'epoch': 3} {'type': 'loss', 'content': 0.10399919748306274, 'timestamp': '2025-09-30 22:39:03.241597', 'step': 16848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:03.303872', 'step': 16848, 'epoch': 3} {'type': 'loss', 'content': 0.06746111810207367, 'timestamp': '2025-09-30 22:39:03.310857', 'step': 16849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:03.370791', 'step': 16849, 'epoch': 3} {'type': 'loss', 'content': 0.08221247047185898, 'timestamp': '2025-09-30 22:39:03.374344', 'step': 16850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:03.432978', 'step': 16850, 'epoch': 3} {'type': 'loss', 'content': 0.11023122072219849, 'timestamp': '2025-09-30 22:39:03.436092', 'step': 16851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:03.510732', 'step': 16851, 'epoch': 3} {'type': 'loss', 'content': 0.07764878869056702, 'timestamp': '2025-09-30 22:39:03.518208', 'step': 16852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:03.576276', 'step': 16852, 'epoch': 3} {'type': 'loss', 'content': 0.15482395887374878, 'timestamp': '2025-09-30 22:39:03.586012', 'step': 16853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:03.644324', 'step': 16853, 'epoch': 3} {'type': 'loss', 'content': 0.09076925367116928, 'timestamp': '2025-09-30 22:39:03.647873', 'step': 16854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:03.712826', 'step': 16854, 'epoch': 3} {'type': 'loss', 'content': 0.08786014467477798, 'timestamp': '2025-09-30 22:39:03.715982', 'step': 16855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:03.777312', 'step': 16855, 'epoch': 3} {'type': 'loss', 'content': 0.16803185641765594, 'timestamp': '2025-09-30 22:39:03.783476', 'step': 16856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:03.840644', 'step': 16856, 'epoch': 3} {'type': 'loss', 'content': 0.050779685378074646, 'timestamp': '2025-09-30 22:39:03.843100', 'step': 16857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:03.900105', 'step': 16857, 'epoch': 3} {'type': 'loss', 'content': 0.1390884816646576, 'timestamp': '2025-09-30 22:39:03.903537', 'step': 16858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:03.988078', 'step': 16858, 'epoch': 3} {'type': 'loss', 'content': 0.14202868938446045, 'timestamp': '2025-09-30 22:39:03.995650', 'step': 16859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:04.077903', 'step': 16859, 'epoch': 3} {'type': 'loss', 'content': 0.11286047101020813, 'timestamp': '2025-09-30 22:39:04.084120', 'step': 16860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:04.143516', 'step': 16860, 'epoch': 3} {'type': 'loss', 'content': 0.12342747300863266, 'timestamp': '2025-09-30 22:39:04.150813', 'step': 16861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:04.214148', 'step': 16861, 'epoch': 3} {'type': 'loss', 'content': 0.13434754312038422, 'timestamp': '2025-09-30 22:39:04.217572', 'step': 16862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:04.275386', 'step': 16862, 'epoch': 3} {'type': 'loss', 'content': 0.06608229130506516, 'timestamp': '2025-09-30 22:39:04.279072', 'step': 16863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:04.336759', 'step': 16863, 'epoch': 3} {'type': 'loss', 'content': 0.1675005853176117, 'timestamp': '2025-09-30 22:39:04.348810', 'step': 16864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:04.411624', 'step': 16864, 'epoch': 3} {'type': 'loss', 'content': 0.18923026323318481, 'timestamp': '2025-09-30 22:39:04.413951', 'step': 16865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:04.472902', 'step': 16865, 'epoch': 3} {'type': 'loss', 'content': 0.0921071320772171, 'timestamp': '2025-09-30 22:39:04.475748', 'step': 16866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:04.550014', 'step': 16866, 'epoch': 3} {'type': 'loss', 'content': 0.09979511797428131, 'timestamp': '2025-09-30 22:39:04.555019', 'step': 16867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:04.613225', 'step': 16867, 'epoch': 3} {'type': 'loss', 'content': 0.12465832382440567, 'timestamp': '2025-09-30 22:39:04.621770', 'step': 16868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:04.678258', 'step': 16868, 'epoch': 3} {'type': 'loss', 'content': 0.2175329625606537, 'timestamp': '2025-09-30 22:39:04.680605', 'step': 16869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:04.744204', 'step': 16869, 'epoch': 3} {'type': 'loss', 'content': 0.04095333814620972, 'timestamp': '2025-09-30 22:39:04.748660', 'step': 16870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:04.809893', 'step': 16870, 'epoch': 3} {'type': 'loss', 'content': 0.14223051071166992, 'timestamp': '2025-09-30 22:39:04.816340', 'step': 16871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:04.873703', 'step': 16871, 'epoch': 3} {'type': 'loss', 'content': 0.06206102669239044, 'timestamp': '2025-09-30 22:39:04.879930', 'step': 16872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:04.937518', 'step': 16872, 'epoch': 3} {'type': 'loss', 'content': 0.10448627918958664, 'timestamp': '2025-09-30 22:39:04.940447', 'step': 16873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:05.007413', 'step': 16873, 'epoch': 3} {'type': 'loss', 'content': 0.07337231934070587, 'timestamp': '2025-09-30 22:39:05.015206', 'step': 16874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:05.082654', 'step': 16874, 'epoch': 3} {'type': 'loss', 'content': 0.09985964000225067, 'timestamp': '2025-09-30 22:39:05.085833', 'step': 16875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:05.143102', 'step': 16875, 'epoch': 3} {'type': 'loss', 'content': 0.08626669645309448, 'timestamp': '2025-09-30 22:39:05.150194', 'step': 16876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:05.212725', 'step': 16876, 'epoch': 3} {'type': 'loss', 'content': 0.09724099934101105, 'timestamp': '2025-09-30 22:39:05.214864', 'step': 16877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:05.273922', 'step': 16877, 'epoch': 3} {'type': 'loss', 'content': 0.04641477391123772, 'timestamp': '2025-09-30 22:39:05.276533', 'step': 16878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:05.335771', 'step': 16878, 'epoch': 3} {'type': 'loss', 'content': 0.10547222942113876, 'timestamp': '2025-09-30 22:39:05.338801', 'step': 16879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:05.396234', 'step': 16879, 'epoch': 3} {'type': 'loss', 'content': 0.12499544024467468, 'timestamp': '2025-09-30 22:39:05.404281', 'step': 16880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:05.465613', 'step': 16880, 'epoch': 3} {'type': 'loss', 'content': 0.06526898592710495, 'timestamp': '2025-09-30 22:39:05.467606', 'step': 16881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:05.527813', 'step': 16881, 'epoch': 3} {'type': 'loss', 'content': 0.07267569750547409, 'timestamp': '2025-09-30 22:39:05.530003', 'step': 16882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:05.588769', 'step': 16882, 'epoch': 3} {'type': 'loss', 'content': 0.05614398419857025, 'timestamp': '2025-09-30 22:39:05.592434', 'step': 16883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:05.650153', 'step': 16883, 'epoch': 3} {'type': 'loss', 'content': 0.06600126624107361, 'timestamp': '2025-09-30 22:39:05.658626', 'step': 16884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:05.718785', 'step': 16884, 'epoch': 3} {'type': 'loss', 'content': 0.05636515095829964, 'timestamp': '2025-09-30 22:39:05.721950', 'step': 16885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:05.785732', 'step': 16885, 'epoch': 3} {'type': 'loss', 'content': 0.09479725360870361, 'timestamp': '2025-09-30 22:39:05.788154', 'step': 16886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:05.845090', 'step': 16886, 'epoch': 3} {'type': 'loss', 'content': 0.19775275886058807, 'timestamp': '2025-09-30 22:39:05.852319', 'step': 16887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:05.909874', 'step': 16887, 'epoch': 3} {'type': 'loss', 'content': 0.10659889131784439, 'timestamp': '2025-09-30 22:39:05.915758', 'step': 16888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:05.979887', 'step': 16888, 'epoch': 3} {'type': 'loss', 'content': 0.06601417809724808, 'timestamp': '2025-09-30 22:39:05.981795', 'step': 16889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:06.042323', 'step': 16889, 'epoch': 3} {'type': 'loss', 'content': 0.12158317118883133, 'timestamp': '2025-09-30 22:39:06.045069', 'step': 16890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:06.102578', 'step': 16890, 'epoch': 3} {'type': 'loss', 'content': 0.10265549272298813, 'timestamp': '2025-09-30 22:39:06.104870', 'step': 16891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:06.168059', 'step': 16891, 'epoch': 3} {'type': 'loss', 'content': 0.08124145120382309, 'timestamp': '2025-09-30 22:39:06.174047', 'step': 16892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:06.235095', 'step': 16892, 'epoch': 3} {'type': 'loss', 'content': 0.08728359639644623, 'timestamp': '2025-09-30 22:39:06.237573', 'step': 16893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:06.295433', 'step': 16893, 'epoch': 3} {'type': 'loss', 'content': 0.07103599607944489, 'timestamp': '2025-09-30 22:39:06.299670', 'step': 16894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:06.358276', 'step': 16894, 'epoch': 3} {'type': 'loss', 'content': 0.05715026333928108, 'timestamp': '2025-09-30 22:39:06.360730', 'step': 16895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:06.422367', 'step': 16895, 'epoch': 3} {'type': 'loss', 'content': 0.06979449838399887, 'timestamp': '2025-09-30 22:39:06.428206', 'step': 16896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:06.492257', 'step': 16896, 'epoch': 3} {'type': 'loss', 'content': 0.07001384347677231, 'timestamp': '2025-09-30 22:39:06.494424', 'step': 16897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:06.553790', 'step': 16897, 'epoch': 3} {'type': 'loss', 'content': 0.0883655995130539, 'timestamp': '2025-09-30 22:39:06.556058', 'step': 16898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:06.613851', 'step': 16898, 'epoch': 3} {'type': 'loss', 'content': 0.13134978711605072, 'timestamp': '2025-09-30 22:39:06.616347', 'step': 16899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:06.685265', 'step': 16899, 'epoch': 3} {'type': 'loss', 'content': 0.0936058983206749, 'timestamp': '2025-09-30 22:39:06.691595', 'step': 16900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:06.748682', 'step': 16900, 'epoch': 3} {'type': 'loss', 'content': 0.09222915768623352, 'timestamp': '2025-09-30 22:39:06.753736', 'step': 16901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:06.813599', 'step': 16901, 'epoch': 3} {'type': 'loss', 'content': 0.18122601509094238, 'timestamp': '2025-09-30 22:39:06.816839', 'step': 16902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:06.876422', 'step': 16902, 'epoch': 3} {'type': 'loss', 'content': 0.12036591023206711, 'timestamp': '2025-09-30 22:39:06.878426', 'step': 16903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:06.942729', 'step': 16903, 'epoch': 3} {'type': 'loss', 'content': 0.08212385326623917, 'timestamp': '2025-09-30 22:39:06.948651', 'step': 16904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:07.005570', 'step': 16904, 'epoch': 3} {'type': 'loss', 'content': 0.06914913654327393, 'timestamp': '2025-09-30 22:39:07.008946', 'step': 16905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:07.077860', 'step': 16905, 'epoch': 3} {'type': 'loss', 'content': 0.060176946222782135, 'timestamp': '2025-09-30 22:39:07.080200', 'step': 16906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:07.138301', 'step': 16906, 'epoch': 3} {'type': 'loss', 'content': 0.05583924800157547, 'timestamp': '2025-09-30 22:39:07.140989', 'step': 16907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:07.198129', 'step': 16907, 'epoch': 3} {'type': 'loss', 'content': 0.08106131106615067, 'timestamp': '2025-09-30 22:39:07.203884', 'step': 16908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:07.264098', 'step': 16908, 'epoch': 3} {'type': 'loss', 'content': 0.16147670149803162, 'timestamp': '2025-09-30 22:39:07.266188', 'step': 16909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:07.329776', 'step': 16909, 'epoch': 3} {'type': 'loss', 'content': 0.05051427707076073, 'timestamp': '2025-09-30 22:39:07.331752', 'step': 16910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:07.388154', 'step': 16910, 'epoch': 3} {'type': 'loss', 'content': 0.09122291207313538, 'timestamp': '2025-09-30 22:39:07.390167', 'step': 16911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:07.447797', 'step': 16911, 'epoch': 3} {'type': 'loss', 'content': 0.05810701474547386, 'timestamp': '2025-09-30 22:39:07.453465', 'step': 16912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:07.514588', 'step': 16912, 'epoch': 3} {'type': 'loss', 'content': 0.03659794479608536, 'timestamp': '2025-09-30 22:39:07.516969', 'step': 16913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:07.574186', 'step': 16913, 'epoch': 3} {'type': 'loss', 'content': 0.08138637244701385, 'timestamp': '2025-09-30 22:39:07.576471', 'step': 16914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:07.645129', 'step': 16914, 'epoch': 3} {'type': 'loss', 'content': 0.1530420184135437, 'timestamp': '2025-09-30 22:39:07.647397', 'step': 16915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:07.705300', 'step': 16915, 'epoch': 3} {'type': 'loss', 'content': 0.10045219957828522, 'timestamp': '2025-09-30 22:39:07.712034', 'step': 16916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:07.773982', 'step': 16916, 'epoch': 3} {'type': 'loss', 'content': 0.1435643434524536, 'timestamp': '2025-09-30 22:39:07.776099', 'step': 16917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:07.840676', 'step': 16917, 'epoch': 3} {'type': 'loss', 'content': 0.0846734270453453, 'timestamp': '2025-09-30 22:39:07.842911', 'step': 16918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:07.908260', 'step': 16918, 'epoch': 3} {'type': 'loss', 'content': 0.07852571457624435, 'timestamp': '2025-09-30 22:39:07.910871', 'step': 16919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:07.972445', 'step': 16919, 'epoch': 3} {'type': 'loss', 'content': 0.06749453395605087, 'timestamp': '2025-09-30 22:39:07.978588', 'step': 16920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:08.036736', 'step': 16920, 'epoch': 3} {'type': 'loss', 'content': 0.03799690306186676, 'timestamp': '2025-09-30 22:39:08.039167', 'step': 16921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:08.097648', 'step': 16921, 'epoch': 3} {'type': 'loss', 'content': 0.1206926479935646, 'timestamp': '2025-09-30 22:39:08.100140', 'step': 16922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:08.158966', 'step': 16922, 'epoch': 3} {'type': 'loss', 'content': 0.1266954392194748, 'timestamp': '2025-09-30 22:39:08.160937', 'step': 16923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:08.219252', 'step': 16923, 'epoch': 3} {'type': 'loss', 'content': 0.14844541251659393, 'timestamp': '2025-09-30 22:39:08.224964', 'step': 16924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:08.284334', 'step': 16924, 'epoch': 3} {'type': 'loss', 'content': 0.044955018907785416, 'timestamp': '2025-09-30 22:39:08.287606', 'step': 16925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:08.344609', 'step': 16925, 'epoch': 3} {'type': 'loss', 'content': 0.06288784742355347, 'timestamp': '2025-09-30 22:39:08.347510', 'step': 16926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:08.405313', 'step': 16926, 'epoch': 3} {'type': 'loss', 'content': 0.1183217242360115, 'timestamp': '2025-09-30 22:39:08.407666', 'step': 16927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:08.467562', 'step': 16927, 'epoch': 3} {'type': 'loss', 'content': 0.12137583643198013, 'timestamp': '2025-09-30 22:39:08.473160', 'step': 16928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:08.530835', 'step': 16928, 'epoch': 3} {'type': 'loss', 'content': 0.06170576438307762, 'timestamp': '2025-09-30 22:39:08.532856', 'step': 16929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:08.589132', 'step': 16929, 'epoch': 3} {'type': 'loss', 'content': 0.1632876694202423, 'timestamp': '2025-09-30 22:39:08.591864', 'step': 16930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:08.649801', 'step': 16930, 'epoch': 3} {'type': 'loss', 'content': 0.04417640343308449, 'timestamp': '2025-09-30 22:39:08.652236', 'step': 16931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:08.710387', 'step': 16931, 'epoch': 3} {'type': 'loss', 'content': 0.07782015204429626, 'timestamp': '2025-09-30 22:39:08.718375', 'step': 16932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:08.777888', 'step': 16932, 'epoch': 3} {'type': 'loss', 'content': 0.17547306418418884, 'timestamp': '2025-09-30 22:39:08.780254', 'step': 16933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:08.838134', 'step': 16933, 'epoch': 3} {'type': 'loss', 'content': 0.07091569155454636, 'timestamp': '2025-09-30 22:39:08.842068', 'step': 16934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:08.898678', 'step': 16934, 'epoch': 3} {'type': 'loss', 'content': 0.060053057968616486, 'timestamp': '2025-09-30 22:39:08.900750', 'step': 16935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:08.965303', 'step': 16935, 'epoch': 3} {'type': 'loss', 'content': 0.040241941809654236, 'timestamp': '2025-09-30 22:39:08.970915', 'step': 16936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:09.040309', 'step': 16936, 'epoch': 3} {'type': 'loss', 'content': 0.0545528307557106, 'timestamp': '2025-09-30 22:39:09.042398', 'step': 16937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:09.100870', 'step': 16937, 'epoch': 3} {'type': 'loss', 'content': 0.14035840332508087, 'timestamp': '2025-09-30 22:39:09.104262', 'step': 16938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.162746', 'step': 16938, 'epoch': 3} {'type': 'loss', 'content': 0.08071374893188477, 'timestamp': '2025-09-30 22:39:09.165748', 'step': 16939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.222989', 'step': 16939, 'epoch': 3} {'type': 'loss', 'content': 0.18101394176483154, 'timestamp': '2025-09-30 22:39:09.228695', 'step': 16940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.287280', 'step': 16940, 'epoch': 3} {'type': 'loss', 'content': 0.1194382905960083, 'timestamp': '2025-09-30 22:39:09.289423', 'step': 16941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.370239', 'step': 16941, 'epoch': 3} {'type': 'loss', 'content': 0.14679411053657532, 'timestamp': '2025-09-30 22:39:09.372906', 'step': 16942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.430939', 'step': 16942, 'epoch': 3} {'type': 'loss', 'content': 0.1369529813528061, 'timestamp': '2025-09-30 22:39:09.432978', 'step': 16943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:09.491229', 'step': 16943, 'epoch': 3} {'type': 'loss', 'content': 0.09250596165657043, 'timestamp': '2025-09-30 22:39:09.496936', 'step': 16944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:09.557872', 'step': 16944, 'epoch': 3} {'type': 'loss', 'content': 0.14903433620929718, 'timestamp': '2025-09-30 22:39:09.560738', 'step': 16945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:09.626962', 'step': 16945, 'epoch': 3} {'type': 'loss', 'content': 0.06406119465827942, 'timestamp': '2025-09-30 22:39:09.629755', 'step': 16946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.686854', 'step': 16946, 'epoch': 3} {'type': 'loss', 'content': 0.06478265672922134, 'timestamp': '2025-09-30 22:39:09.689120', 'step': 16947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.748975', 'step': 16947, 'epoch': 3} {'type': 'loss', 'content': 0.10601598769426346, 'timestamp': '2025-09-30 22:39:09.755347', 'step': 16948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.825539', 'step': 16948, 'epoch': 3} {'type': 'loss', 'content': 0.08517871052026749, 'timestamp': '2025-09-30 22:39:09.828259', 'step': 16949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:09.895369', 'step': 16949, 'epoch': 3} {'type': 'loss', 'content': 0.07398878782987595, 'timestamp': '2025-09-30 22:39:09.898706', 'step': 16950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:09.956784', 'step': 16950, 'epoch': 3} {'type': 'loss', 'content': 0.01162364799529314, 'timestamp': '2025-09-30 22:39:09.958920', 'step': 16951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:10.027583', 'step': 16951, 'epoch': 3} {'type': 'loss', 'content': 0.06860288232564926, 'timestamp': '2025-09-30 22:39:10.034269', 'step': 16952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:10.092129', 'step': 16952, 'epoch': 3} {'type': 'loss', 'content': 0.20394185185432434, 'timestamp': '2025-09-30 22:39:10.094337', 'step': 16953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:10.153210', 'step': 16953, 'epoch': 3} {'type': 'loss', 'content': 0.09417863190174103, 'timestamp': '2025-09-30 22:39:10.155642', 'step': 16954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:10.213854', 'step': 16954, 'epoch': 3} {'type': 'loss', 'content': 0.07244294881820679, 'timestamp': '2025-09-30 22:39:10.216317', 'step': 16955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:10.277547', 'step': 16955, 'epoch': 3} {'type': 'loss', 'content': 0.11623477190732956, 'timestamp': '2025-09-30 22:39:10.283381', 'step': 16956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:10.340268', 'step': 16956, 'epoch': 3} {'type': 'loss', 'content': 0.07245402038097382, 'timestamp': '2025-09-30 22:39:10.342916', 'step': 16957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:10.400785', 'step': 16957, 'epoch': 3} {'type': 'loss', 'content': 0.1259932518005371, 'timestamp': '2025-09-30 22:39:10.404021', 'step': 16958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:10.463264', 'step': 16958, 'epoch': 3} {'type': 'loss', 'content': 0.12728911638259888, 'timestamp': '2025-09-30 22:39:10.465732', 'step': 16959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:10.526541', 'step': 16959, 'epoch': 3} {'type': 'loss', 'content': 0.08872567862272263, 'timestamp': '2025-09-30 22:39:10.532351', 'step': 16960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:10.590110', 'step': 16960, 'epoch': 3} {'type': 'loss', 'content': 0.15349102020263672, 'timestamp': '2025-09-30 22:39:10.592444', 'step': 16961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:10.661143', 'step': 16961, 'epoch': 3} {'type': 'loss', 'content': 0.04426991567015648, 'timestamp': '2025-09-30 22:39:10.663429', 'step': 16962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:10.734966', 'step': 16962, 'epoch': 3} {'type': 'loss', 'content': 0.09347306936979294, 'timestamp': '2025-09-30 22:39:10.737591', 'step': 16963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:10.795351', 'step': 16963, 'epoch': 3} {'type': 'loss', 'content': 0.039177797734737396, 'timestamp': '2025-09-30 22:39:10.801219', 'step': 16964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:10.859333', 'step': 16964, 'epoch': 3} {'type': 'loss', 'content': 0.09890854358673096, 'timestamp': '2025-09-30 22:39:10.861866', 'step': 16965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:10.918612', 'step': 16965, 'epoch': 3} {'type': 'loss', 'content': 0.04246539622545242, 'timestamp': '2025-09-30 22:39:10.920769', 'step': 16966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:10.978216', 'step': 16966, 'epoch': 3} {'type': 'loss', 'content': 0.10616988688707352, 'timestamp': '2025-09-30 22:39:10.981909', 'step': 16967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:11.041099', 'step': 16967, 'epoch': 3} {'type': 'loss', 'content': 0.10502312332391739, 'timestamp': '2025-09-30 22:39:11.047187', 'step': 16968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:11.105651', 'step': 16968, 'epoch': 3} {'type': 'loss', 'content': 0.0526701845228672, 'timestamp': '2025-09-30 22:39:11.108137', 'step': 16969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:11.165836', 'step': 16969, 'epoch': 3} {'type': 'loss', 'content': 0.08754757046699524, 'timestamp': '2025-09-30 22:39:11.168200', 'step': 16970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:11.227073', 'step': 16970, 'epoch': 3} {'type': 'loss', 'content': 0.10516658425331116, 'timestamp': '2025-09-30 22:39:11.229264', 'step': 16971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:11.296027', 'step': 16971, 'epoch': 3} {'type': 'loss', 'content': 0.08857792615890503, 'timestamp': '2025-09-30 22:39:11.301976', 'step': 16972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:11.358730', 'step': 16972, 'epoch': 3} {'type': 'loss', 'content': 0.15798793733119965, 'timestamp': '2025-09-30 22:39:11.360932', 'step': 16973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:11.419280', 'step': 16973, 'epoch': 3} {'type': 'loss', 'content': 0.05811462923884392, 'timestamp': '2025-09-30 22:39:11.421680', 'step': 16974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:11.478069', 'step': 16974, 'epoch': 3} {'type': 'loss', 'content': 0.048944469541311264, 'timestamp': '2025-09-30 22:39:11.483975', 'step': 16975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:11.541118', 'step': 16975, 'epoch': 3} {'type': 'loss', 'content': 0.07489174604415894, 'timestamp': '2025-09-30 22:39:11.547308', 'step': 16976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:11.605702', 'step': 16976, 'epoch': 3} {'type': 'loss', 'content': 0.07051344960927963, 'timestamp': '2025-09-30 22:39:11.608794', 'step': 16977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:11.665366', 'step': 16977, 'epoch': 3} {'type': 'loss', 'content': 0.16144613921642303, 'timestamp': '2025-09-30 22:39:11.669693', 'step': 16978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:11.735495', 'step': 16978, 'epoch': 3} {'type': 'loss', 'content': 0.05272509902715683, 'timestamp': '2025-09-30 22:39:11.737865', 'step': 16979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:11.808563', 'step': 16979, 'epoch': 3} {'type': 'loss', 'content': 0.07041224837303162, 'timestamp': '2025-09-30 22:39:11.814628', 'step': 16980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:11.872155', 'step': 16980, 'epoch': 3} {'type': 'loss', 'content': 0.10285012423992157, 'timestamp': '2025-09-30 22:39:11.874636', 'step': 16981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:11.933797', 'step': 16981, 'epoch': 3} {'type': 'loss', 'content': 0.05266781896352768, 'timestamp': '2025-09-30 22:39:11.936105', 'step': 16982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:12.002164', 'step': 16982, 'epoch': 3} {'type': 'loss', 'content': 0.07908618450164795, 'timestamp': '2025-09-30 22:39:12.005752', 'step': 16983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:12.064839', 'step': 16983, 'epoch': 3} {'type': 'loss', 'content': 0.1329272985458374, 'timestamp': '2025-09-30 22:39:12.071212', 'step': 16984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:12.127302', 'step': 16984, 'epoch': 3} {'type': 'loss', 'content': 0.11044558882713318, 'timestamp': '2025-09-30 22:39:12.129527', 'step': 16985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:12.186400', 'step': 16985, 'epoch': 3} {'type': 'loss', 'content': 0.06712911278009415, 'timestamp': '2025-09-30 22:39:12.189329', 'step': 16986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:12.246088', 'step': 16986, 'epoch': 3} {'type': 'loss', 'content': 0.09578920900821686, 'timestamp': '2025-09-30 22:39:12.248596', 'step': 16987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:12.307146', 'step': 16987, 'epoch': 3} {'type': 'loss', 'content': 0.07051633298397064, 'timestamp': '2025-09-30 22:39:12.313160', 'step': 16988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:12.370732', 'step': 16988, 'epoch': 3} {'type': 'loss', 'content': 0.08524800837039948, 'timestamp': '2025-09-30 22:39:12.373049', 'step': 16989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:12.431061', 'step': 16989, 'epoch': 3} {'type': 'loss', 'content': 0.08196751028299332, 'timestamp': '2025-09-30 22:39:12.433340', 'step': 16990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:12.490503', 'step': 16990, 'epoch': 3} {'type': 'loss', 'content': 0.13380247354507446, 'timestamp': '2025-09-30 22:39:12.493738', 'step': 16991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:12.551702', 'step': 16991, 'epoch': 3} {'type': 'loss', 'content': 0.10307912528514862, 'timestamp': '2025-09-30 22:39:12.558109', 'step': 16992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:12.615876', 'step': 16992, 'epoch': 3} {'type': 'loss', 'content': 0.1144072562456131, 'timestamp': '2025-09-30 22:39:12.619639', 'step': 16993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:12.675624', 'step': 16993, 'epoch': 3} {'type': 'loss', 'content': 0.07237391918897629, 'timestamp': '2025-09-30 22:39:12.679897', 'step': 16994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:12.740300', 'step': 16994, 'epoch': 3} {'type': 'loss', 'content': 0.061086494475603104, 'timestamp': '2025-09-30 22:39:12.744096', 'step': 16995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:12.805404', 'step': 16995, 'epoch': 3} {'type': 'loss', 'content': 0.07482133060693741, 'timestamp': '2025-09-30 22:39:12.812294', 'step': 16996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:12.870705', 'step': 16996, 'epoch': 3} {'type': 'loss', 'content': 0.11464188992977142, 'timestamp': '2025-09-30 22:39:12.873632', 'step': 16997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:12.930783', 'step': 16997, 'epoch': 3} {'type': 'loss', 'content': 0.11720924079418182, 'timestamp': '2025-09-30 22:39:12.935281', 'step': 16998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:12.993434', 'step': 16998, 'epoch': 3} {'type': 'loss', 'content': 0.12377453595399857, 'timestamp': '2025-09-30 22:39:12.995576', 'step': 16999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:13.053740', 'step': 16999, 'epoch': 3} {'type': 'loss', 'content': 0.11621272563934326, 'timestamp': '2025-09-30 22:39:13.059814', 'step': 17000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17000', 'timestamp': '2025-09-30 22:39:13.488609', 'step': 17000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:13.547964', 'step': 17000, 'epoch': 3} {'type': 'loss', 'content': 0.08685597032308578, 'timestamp': '2025-09-30 22:39:13.550935', 'step': 17001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:13.616577', 'step': 17001, 'epoch': 3} {'type': 'loss', 'content': 0.0828801766037941, 'timestamp': '2025-09-30 22:39:13.620207', 'step': 17002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:13.678283', 'step': 17002, 'epoch': 3} {'type': 'loss', 'content': 0.1222335621714592, 'timestamp': '2025-09-30 22:39:13.680753', 'step': 17003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:13.745299', 'step': 17003, 'epoch': 3} {'type': 'loss', 'content': 0.09608006477355957, 'timestamp': '2025-09-30 22:39:13.752345', 'step': 17004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:13.811631', 'step': 17004, 'epoch': 3} {'type': 'loss', 'content': 0.05691716447472572, 'timestamp': '2025-09-30 22:39:13.814240', 'step': 17005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:13.873570', 'step': 17005, 'epoch': 3} {'type': 'loss', 'content': 0.029359133914113045, 'timestamp': '2025-09-30 22:39:13.876022', 'step': 17006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:13.933149', 'step': 17006, 'epoch': 3} {'type': 'loss', 'content': 0.10092474520206451, 'timestamp': '2025-09-30 22:39:13.935651', 'step': 17007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:13.993220', 'step': 17007, 'epoch': 3} {'type': 'loss', 'content': 0.06766641139984131, 'timestamp': '2025-09-30 22:39:13.999272', 'step': 17008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.064644', 'step': 17008, 'epoch': 3} {'type': 'loss', 'content': 0.08271852135658264, 'timestamp': '2025-09-30 22:39:14.068596', 'step': 17009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.125712', 'step': 17009, 'epoch': 3} {'type': 'loss', 'content': 0.022346561774611473, 'timestamp': '2025-09-30 22:39:14.128188', 'step': 17010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.185389', 'step': 17010, 'epoch': 3} {'type': 'loss', 'content': 0.11702880263328552, 'timestamp': '2025-09-30 22:39:14.189664', 'step': 17011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.254812', 'step': 17011, 'epoch': 3} {'type': 'loss', 'content': 0.12283992767333984, 'timestamp': '2025-09-30 22:39:14.260777', 'step': 17012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:14.334140', 'step': 17012, 'epoch': 3} {'type': 'loss', 'content': 0.10861311107873917, 'timestamp': '2025-09-30 22:39:14.336912', 'step': 17013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.394466', 'step': 17013, 'epoch': 3} {'type': 'loss', 'content': 0.15518316626548767, 'timestamp': '2025-09-30 22:39:14.397108', 'step': 17014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.455889', 'step': 17014, 'epoch': 3} {'type': 'loss', 'content': 0.0786360427737236, 'timestamp': '2025-09-30 22:39:14.459869', 'step': 17015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:14.519202', 'step': 17015, 'epoch': 3} {'type': 'loss', 'content': 0.0407598540186882, 'timestamp': '2025-09-30 22:39:14.526690', 'step': 17016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.586195', 'step': 17016, 'epoch': 3} {'type': 'loss', 'content': 0.1289289891719818, 'timestamp': '2025-09-30 22:39:14.589764', 'step': 17017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:14.647382', 'step': 17017, 'epoch': 3} {'type': 'loss', 'content': 0.15328289568424225, 'timestamp': '2025-09-30 22:39:14.650238', 'step': 17018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:14.709553', 'step': 17018, 'epoch': 3} {'type': 'loss', 'content': 0.11838126927614212, 'timestamp': '2025-09-30 22:39:14.712069', 'step': 17019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:14.770383', 'step': 17019, 'epoch': 3} {'type': 'loss', 'content': 0.13241466879844666, 'timestamp': '2025-09-30 22:39:14.779695', 'step': 17020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:14.860955', 'step': 17020, 'epoch': 3} {'type': 'loss', 'content': 0.1075345128774643, 'timestamp': '2025-09-30 22:39:14.863400', 'step': 17021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:14.919571', 'step': 17021, 'epoch': 3} {'type': 'loss', 'content': 0.13173234462738037, 'timestamp': '2025-09-30 22:39:14.923047', 'step': 17022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:14.982582', 'step': 17022, 'epoch': 3} {'type': 'loss', 'content': 0.07262316346168518, 'timestamp': '2025-09-30 22:39:14.985904', 'step': 17023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:15.052579', 'step': 17023, 'epoch': 3} {'type': 'loss', 'content': 0.0951794907450676, 'timestamp': '2025-09-30 22:39:15.058700', 'step': 17024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:15.117958', 'step': 17024, 'epoch': 3} {'type': 'loss', 'content': 0.10185898840427399, 'timestamp': '2025-09-30 22:39:15.121275', 'step': 17025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:15.180127', 'step': 17025, 'epoch': 3} {'type': 'loss', 'content': 0.16256806254386902, 'timestamp': '2025-09-30 22:39:15.182978', 'step': 17026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:15.241120', 'step': 17026, 'epoch': 3} {'type': 'loss', 'content': 0.10090426355600357, 'timestamp': '2025-09-30 22:39:15.244051', 'step': 17027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:15.303675', 'step': 17027, 'epoch': 3} {'type': 'loss', 'content': 0.06977328658103943, 'timestamp': '2025-09-30 22:39:15.311169', 'step': 17028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:15.378594', 'step': 17028, 'epoch': 3} {'type': 'loss', 'content': 0.10235874354839325, 'timestamp': '2025-09-30 22:39:15.380988', 'step': 17029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:15.441337', 'step': 17029, 'epoch': 3} {'type': 'loss', 'content': 0.057176921516656876, 'timestamp': '2025-09-30 22:39:15.444029', 'step': 17030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:15.505428', 'step': 17030, 'epoch': 3} {'type': 'loss', 'content': 0.12159992009401321, 'timestamp': '2025-09-30 22:39:15.507712', 'step': 17031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:15.575973', 'step': 17031, 'epoch': 3} {'type': 'loss', 'content': 0.07219662517309189, 'timestamp': '2025-09-30 22:39:15.581743', 'step': 17032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:15.641311', 'step': 17032, 'epoch': 3} {'type': 'loss', 'content': 0.0799756646156311, 'timestamp': '2025-09-30 22:39:15.644132', 'step': 17033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:15.707086', 'step': 17033, 'epoch': 3} {'type': 'loss', 'content': 0.14347712695598602, 'timestamp': '2025-09-30 22:39:15.709862', 'step': 17034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:15.768822', 'step': 17034, 'epoch': 3} {'type': 'loss', 'content': 0.11790945380926132, 'timestamp': '2025-09-30 22:39:15.771214', 'step': 17035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:15.833241', 'step': 17035, 'epoch': 3} {'type': 'loss', 'content': 0.10459689795970917, 'timestamp': '2025-09-30 22:39:15.839104', 'step': 17036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:15.896468', 'step': 17036, 'epoch': 3} {'type': 'loss', 'content': 0.10834759473800659, 'timestamp': '2025-09-30 22:39:15.898625', 'step': 17037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:15.956884', 'step': 17037, 'epoch': 3} {'type': 'loss', 'content': 0.09810008108615875, 'timestamp': '2025-09-30 22:39:15.959516', 'step': 17038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:16.022914', 'step': 17038, 'epoch': 3} {'type': 'loss', 'content': 0.04568830877542496, 'timestamp': '2025-09-30 22:39:16.026449', 'step': 17039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:16.083309', 'step': 17039, 'epoch': 3} {'type': 'loss', 'content': 0.08675379306077957, 'timestamp': '2025-09-30 22:39:16.090456', 'step': 17040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:16.155657', 'step': 17040, 'epoch': 3} {'type': 'loss', 'content': 0.06489063799381256, 'timestamp': '2025-09-30 22:39:16.158054', 'step': 17041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:16.215449', 'step': 17041, 'epoch': 3} {'type': 'loss', 'content': 0.09863987565040588, 'timestamp': '2025-09-30 22:39:16.218047', 'step': 17042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:16.275637', 'step': 17042, 'epoch': 3} {'type': 'loss', 'content': 0.0931890606880188, 'timestamp': '2025-09-30 22:39:16.277978', 'step': 17043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:16.335498', 'step': 17043, 'epoch': 3} {'type': 'loss', 'content': 0.06548355519771576, 'timestamp': '2025-09-30 22:39:16.341961', 'step': 17044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:16.422592', 'step': 17044, 'epoch': 3} {'type': 'loss', 'content': 0.070401132106781, 'timestamp': '2025-09-30 22:39:16.425154', 'step': 17045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:16.488648', 'step': 17045, 'epoch': 3} {'type': 'loss', 'content': 0.0884355828166008, 'timestamp': '2025-09-30 22:39:16.492302', 'step': 17046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:16.550514', 'step': 17046, 'epoch': 3} {'type': 'loss', 'content': 0.07040367275476456, 'timestamp': '2025-09-30 22:39:16.553144', 'step': 17047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:16.617963', 'step': 17047, 'epoch': 3} {'type': 'loss', 'content': 0.17976433038711548, 'timestamp': '2025-09-30 22:39:16.624065', 'step': 17048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:16.682782', 'step': 17048, 'epoch': 3} {'type': 'loss', 'content': 0.09214283525943756, 'timestamp': '2025-09-30 22:39:16.685346', 'step': 17049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:16.746200', 'step': 17049, 'epoch': 3} {'type': 'loss', 'content': 0.05774746090173721, 'timestamp': '2025-09-30 22:39:16.748969', 'step': 17050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:16.807296', 'step': 17050, 'epoch': 3} {'type': 'loss', 'content': 0.09330713748931885, 'timestamp': '2025-09-30 22:39:16.809925', 'step': 17051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:16.867685', 'step': 17051, 'epoch': 3} {'type': 'loss', 'content': 0.12902581691741943, 'timestamp': '2025-09-30 22:39:16.873905', 'step': 17052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:16.930885', 'step': 17052, 'epoch': 3} {'type': 'loss', 'content': 0.044092029333114624, 'timestamp': '2025-09-30 22:39:16.934381', 'step': 17053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:16.992708', 'step': 17053, 'epoch': 3} {'type': 'loss', 'content': 0.05141308903694153, 'timestamp': '2025-09-30 22:39:16.997658', 'step': 17054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:17.064438', 'step': 17054, 'epoch': 3} {'type': 'loss', 'content': 0.20213913917541504, 'timestamp': '2025-09-30 22:39:17.070776', 'step': 17055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:17.139767', 'step': 17055, 'epoch': 3} {'type': 'loss', 'content': 0.017833923920989037, 'timestamp': '2025-09-30 22:39:17.148794', 'step': 17056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:17.213197', 'step': 17056, 'epoch': 3} {'type': 'loss', 'content': 0.11623718589544296, 'timestamp': '2025-09-30 22:39:17.227587', 'step': 17057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:17.289117', 'step': 17057, 'epoch': 3} {'type': 'loss', 'content': 0.18139874935150146, 'timestamp': '2025-09-30 22:39:17.291573', 'step': 17058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:17.347541', 'step': 17058, 'epoch': 3} {'type': 'loss', 'content': 0.04985389485955238, 'timestamp': '2025-09-30 22:39:17.349925', 'step': 17059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:17.414654', 'step': 17059, 'epoch': 3} {'type': 'loss', 'content': 0.06627178192138672, 'timestamp': '2025-09-30 22:39:17.420706', 'step': 17060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:17.478448', 'step': 17060, 'epoch': 3} {'type': 'loss', 'content': 0.09747155010700226, 'timestamp': '2025-09-30 22:39:17.480624', 'step': 17061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:17.538211', 'step': 17061, 'epoch': 3} {'type': 'loss', 'content': 0.07406515628099442, 'timestamp': '2025-09-30 22:39:17.540869', 'step': 17062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:17.597481', 'step': 17062, 'epoch': 3} {'type': 'loss', 'content': 0.07818056643009186, 'timestamp': '2025-09-30 22:39:17.601117', 'step': 17063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:17.657552', 'step': 17063, 'epoch': 3} {'type': 'loss', 'content': 0.09668830037117004, 'timestamp': '2025-09-30 22:39:17.663264', 'step': 17064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:17.720204', 'step': 17064, 'epoch': 3} {'type': 'loss', 'content': 0.08882837742567062, 'timestamp': '2025-09-30 22:39:17.724468', 'step': 17065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:17.793617', 'step': 17065, 'epoch': 3} {'type': 'loss', 'content': 0.09606293588876724, 'timestamp': '2025-09-30 22:39:17.796397', 'step': 17066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:17.853425', 'step': 17066, 'epoch': 3} {'type': 'loss', 'content': 0.06866607815027237, 'timestamp': '2025-09-30 22:39:17.859860', 'step': 17067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:17.920080', 'step': 17067, 'epoch': 3} {'type': 'loss', 'content': 0.06748536974191666, 'timestamp': '2025-09-30 22:39:17.928656', 'step': 17068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:17.987227', 'step': 17068, 'epoch': 3} {'type': 'loss', 'content': 0.08333209902048111, 'timestamp': '2025-09-30 22:39:17.991329', 'step': 17069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:18.050503', 'step': 17069, 'epoch': 3} {'type': 'loss', 'content': 0.0527794174849987, 'timestamp': '2025-09-30 22:39:18.056674', 'step': 17070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:18.128188', 'step': 17070, 'epoch': 3} {'type': 'loss', 'content': 0.1134880930185318, 'timestamp': '2025-09-30 22:39:18.133395', 'step': 17071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:18.194815', 'step': 17071, 'epoch': 3} {'type': 'loss', 'content': 0.09377730637788773, 'timestamp': '2025-09-30 22:39:18.202051', 'step': 17072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:18.268137', 'step': 17072, 'epoch': 3} {'type': 'loss', 'content': 0.056992046535015106, 'timestamp': '2025-09-30 22:39:18.270626', 'step': 17073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:18.333801', 'step': 17073, 'epoch': 3} {'type': 'loss', 'content': 0.07494925707578659, 'timestamp': '2025-09-30 22:39:18.336478', 'step': 17074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:18.397583', 'step': 17074, 'epoch': 3} {'type': 'loss', 'content': 0.08234965801239014, 'timestamp': '2025-09-30 22:39:18.399968', 'step': 17075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:18.456884', 'step': 17075, 'epoch': 3} {'type': 'loss', 'content': 0.12465497106313705, 'timestamp': '2025-09-30 22:39:18.463661', 'step': 17076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:18.519556', 'step': 17076, 'epoch': 3} {'type': 'loss', 'content': 0.10830417275428772, 'timestamp': '2025-09-30 22:39:18.522746', 'step': 17077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:18.582633', 'step': 17077, 'epoch': 3} {'type': 'loss', 'content': 0.02792656421661377, 'timestamp': '2025-09-30 22:39:18.590647', 'step': 17078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:18.655648', 'step': 17078, 'epoch': 3} {'type': 'loss', 'content': 0.14473989605903625, 'timestamp': '2025-09-30 22:39:18.658237', 'step': 17079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:18.724640', 'step': 17079, 'epoch': 3} {'type': 'loss', 'content': 0.08321692794561386, 'timestamp': '2025-09-30 22:39:18.730732', 'step': 17080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:18.789502', 'step': 17080, 'epoch': 3} {'type': 'loss', 'content': 0.08166423439979553, 'timestamp': '2025-09-30 22:39:18.792406', 'step': 17081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:18.857358', 'step': 17081, 'epoch': 3} {'type': 'loss', 'content': 0.18150368332862854, 'timestamp': '2025-09-30 22:39:18.859945', 'step': 17082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:18.921542', 'step': 17082, 'epoch': 3} {'type': 'loss', 'content': 0.11987865716218948, 'timestamp': '2025-09-30 22:39:18.924085', 'step': 17083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:18.991727', 'step': 17083, 'epoch': 3} {'type': 'loss', 'content': 0.15566156804561615, 'timestamp': '2025-09-30 22:39:18.998512', 'step': 17084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:19.061942', 'step': 17084, 'epoch': 3} {'type': 'loss', 'content': 0.128777414560318, 'timestamp': '2025-09-30 22:39:19.065904', 'step': 17085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:19.125256', 'step': 17085, 'epoch': 3} {'type': 'loss', 'content': 0.06433723866939545, 'timestamp': '2025-09-30 22:39:19.127721', 'step': 17086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:19.185312', 'step': 17086, 'epoch': 3} {'type': 'loss', 'content': 0.09990287572145462, 'timestamp': '2025-09-30 22:39:19.187746', 'step': 17087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:19.245169', 'step': 17087, 'epoch': 3} {'type': 'loss', 'content': 0.20689329504966736, 'timestamp': '2025-09-30 22:39:19.251046', 'step': 17088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:19.311700', 'step': 17088, 'epoch': 3} {'type': 'loss', 'content': 0.13222888112068176, 'timestamp': '2025-09-30 22:39:19.314501', 'step': 17089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:19.371976', 'step': 17089, 'epoch': 3} {'type': 'loss', 'content': 0.1286422461271286, 'timestamp': '2025-09-30 22:39:19.374194', 'step': 17090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:19.433329', 'step': 17090, 'epoch': 3} {'type': 'loss', 'content': 0.040995825082063675, 'timestamp': '2025-09-30 22:39:19.436013', 'step': 17091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:19.499401', 'step': 17091, 'epoch': 3} {'type': 'loss', 'content': 0.17130577564239502, 'timestamp': '2025-09-30 22:39:19.505445', 'step': 17092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:19.563041', 'step': 17092, 'epoch': 3} {'type': 'loss', 'content': 0.17142681777477264, 'timestamp': '2025-09-30 22:39:19.565329', 'step': 17093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:19.623089', 'step': 17093, 'epoch': 3} {'type': 'loss', 'content': 0.1283016800880432, 'timestamp': '2025-09-30 22:39:19.625278', 'step': 17094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:19.683333', 'step': 17094, 'epoch': 3} {'type': 'loss', 'content': 0.16333599388599396, 'timestamp': '2025-09-30 22:39:19.685270', 'step': 17095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:19.743939', 'step': 17095, 'epoch': 3} {'type': 'loss', 'content': 0.07399675995111465, 'timestamp': '2025-09-30 22:39:19.749407', 'step': 17096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:19.805644', 'step': 17096, 'epoch': 3} {'type': 'loss', 'content': 0.11343817412853241, 'timestamp': '2025-09-30 22:39:19.809450', 'step': 17097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:19.867671', 'step': 17097, 'epoch': 3} {'type': 'loss', 'content': 0.05877237394452095, 'timestamp': '2025-09-30 22:39:19.870207', 'step': 17098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:19.929879', 'step': 17098, 'epoch': 3} {'type': 'loss', 'content': 0.07316728681325912, 'timestamp': '2025-09-30 22:39:19.932852', 'step': 17099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:19.994247', 'step': 17099, 'epoch': 3} {'type': 'loss', 'content': 0.11207679659128189, 'timestamp': '2025-09-30 22:39:20.001416', 'step': 17100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:20.060542', 'step': 17100, 'epoch': 3} {'type': 'loss', 'content': 0.07161912322044373, 'timestamp': '2025-09-30 22:39:20.064003', 'step': 17101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:20.129828', 'step': 17101, 'epoch': 3} {'type': 'loss', 'content': 0.22663573920726776, 'timestamp': '2025-09-30 22:39:20.132380', 'step': 17102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:20.189794', 'step': 17102, 'epoch': 3} {'type': 'loss', 'content': 0.04288889467716217, 'timestamp': '2025-09-30 22:39:20.193122', 'step': 17103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:20.251847', 'step': 17103, 'epoch': 3} {'type': 'loss', 'content': 0.0901557207107544, 'timestamp': '2025-09-30 22:39:20.262812', 'step': 17104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:20.319659', 'step': 17104, 'epoch': 3} {'type': 'loss', 'content': 0.15950730443000793, 'timestamp': '2025-09-30 22:39:20.321915', 'step': 17105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:20.379059', 'step': 17105, 'epoch': 3} {'type': 'loss', 'content': 0.12007564306259155, 'timestamp': '2025-09-30 22:39:20.382980', 'step': 17106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:20.440435', 'step': 17106, 'epoch': 3} {'type': 'loss', 'content': 0.051270853728055954, 'timestamp': '2025-09-30 22:39:20.443055', 'step': 17107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:20.506332', 'step': 17107, 'epoch': 3} {'type': 'loss', 'content': 0.19578342139720917, 'timestamp': '2025-09-30 22:39:20.521587', 'step': 17108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:20.578666', 'step': 17108, 'epoch': 3} {'type': 'loss', 'content': 0.03748828172683716, 'timestamp': '2025-09-30 22:39:20.581385', 'step': 17109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:20.652623', 'step': 17109, 'epoch': 3} {'type': 'loss', 'content': 0.08673333376646042, 'timestamp': '2025-09-30 22:39:20.654575', 'step': 17110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:20.716909', 'step': 17110, 'epoch': 3} {'type': 'loss', 'content': 0.049833014607429504, 'timestamp': '2025-09-30 22:39:20.719138', 'step': 17111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:20.778068', 'step': 17111, 'epoch': 3} {'type': 'loss', 'content': 0.08718241751194, 'timestamp': '2025-09-30 22:39:20.785810', 'step': 17112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:20.845602', 'step': 17112, 'epoch': 3} {'type': 'loss', 'content': 0.09704536199569702, 'timestamp': '2025-09-30 22:39:20.848368', 'step': 17113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:20.905419', 'step': 17113, 'epoch': 3} {'type': 'loss', 'content': 0.07942107319831848, 'timestamp': '2025-09-30 22:39:20.908161', 'step': 17114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:20.983844', 'step': 17114, 'epoch': 3} {'type': 'loss', 'content': 0.1097673624753952, 'timestamp': '2025-09-30 22:39:20.986689', 'step': 17115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:21.044228', 'step': 17115, 'epoch': 3} {'type': 'loss', 'content': 0.09717176109552383, 'timestamp': '2025-09-30 22:39:21.050892', 'step': 17116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:21.106687', 'step': 17116, 'epoch': 3} {'type': 'loss', 'content': 0.06941927224397659, 'timestamp': '2025-09-30 22:39:21.109030', 'step': 17117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:21.168110', 'step': 17117, 'epoch': 3} {'type': 'loss', 'content': 0.045884039252996445, 'timestamp': '2025-09-30 22:39:21.170356', 'step': 17118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:21.228414', 'step': 17118, 'epoch': 3} {'type': 'loss', 'content': 0.11430177837610245, 'timestamp': '2025-09-30 22:39:21.231203', 'step': 17119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:21.290218', 'step': 17119, 'epoch': 3} {'type': 'loss', 'content': 0.0699550211429596, 'timestamp': '2025-09-30 22:39:21.296652', 'step': 17120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:21.359682', 'step': 17120, 'epoch': 3} {'type': 'loss', 'content': 0.049935393035411835, 'timestamp': '2025-09-30 22:39:21.362392', 'step': 17121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:39:21.420307', 'step': 17121, 'epoch': 3} {'type': 'loss', 'content': 0.0768340528011322, 'timestamp': '2025-09-30 22:39:21.422686', 'step': 17122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:21.481547', 'step': 17122, 'epoch': 3} {'type': 'loss', 'content': 0.04337354004383087, 'timestamp': '2025-09-30 22:39:21.484868', 'step': 17123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:21.540777', 'step': 17123, 'epoch': 3} {'type': 'loss', 'content': 0.05472807213664055, 'timestamp': '2025-09-30 22:39:21.547503', 'step': 17124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:21.612883', 'step': 17124, 'epoch': 3} {'type': 'loss', 'content': 0.09675828367471695, 'timestamp': '2025-09-30 22:39:21.615041', 'step': 17125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:21.674789', 'step': 17125, 'epoch': 3} {'type': 'loss', 'content': 0.13971564173698425, 'timestamp': '2025-09-30 22:39:21.679754', 'step': 17126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:21.738827', 'step': 17126, 'epoch': 3} {'type': 'loss', 'content': 0.11784154176712036, 'timestamp': '2025-09-30 22:39:21.741170', 'step': 17127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:21.798433', 'step': 17127, 'epoch': 3} {'type': 'loss', 'content': 0.09287283569574356, 'timestamp': '2025-09-30 22:39:21.804623', 'step': 17128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:21.861883', 'step': 17128, 'epoch': 3} {'type': 'loss', 'content': 0.08441122621297836, 'timestamp': '2025-09-30 22:39:21.864275', 'step': 17129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:21.932757', 'step': 17129, 'epoch': 3} {'type': 'loss', 'content': 0.154155433177948, 'timestamp': '2025-09-30 22:39:21.935341', 'step': 17130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:22.007517', 'step': 17130, 'epoch': 3} {'type': 'loss', 'content': 0.18943649530410767, 'timestamp': '2025-09-30 22:39:22.012905', 'step': 17131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:22.078225', 'step': 17131, 'epoch': 3} {'type': 'loss', 'content': 0.15681742131710052, 'timestamp': '2025-09-30 22:39:22.087195', 'step': 17132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:22.165505', 'step': 17132, 'epoch': 3} {'type': 'loss', 'content': 0.05049503222107887, 'timestamp': '2025-09-30 22:39:22.167899', 'step': 17133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:22.225213', 'step': 17133, 'epoch': 3} {'type': 'loss', 'content': 0.14004826545715332, 'timestamp': '2025-09-30 22:39:22.227862', 'step': 17134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:22.296404', 'step': 17134, 'epoch': 3} {'type': 'loss', 'content': 0.10190801322460175, 'timestamp': '2025-09-30 22:39:22.299086', 'step': 17135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:22.357408', 'step': 17135, 'epoch': 3} {'type': 'loss', 'content': 0.1527792066335678, 'timestamp': '2025-09-30 22:39:22.363440', 'step': 17136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:22.435084', 'step': 17136, 'epoch': 3} {'type': 'loss', 'content': 0.03672618418931961, 'timestamp': '2025-09-30 22:39:22.437901', 'step': 17137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:22.507207', 'step': 17137, 'epoch': 3} {'type': 'loss', 'content': 0.1622573882341385, 'timestamp': '2025-09-30 22:39:22.509073', 'step': 17138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:22.577234', 'step': 17138, 'epoch': 3} {'type': 'loss', 'content': 0.09283232688903809, 'timestamp': '2025-09-30 22:39:22.579250', 'step': 17139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:39:22.636531', 'step': 17139, 'epoch': 3} {'type': 'loss', 'content': 0.08504216372966766, 'timestamp': '2025-09-30 22:39:22.645831', 'step': 17140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:22.713740', 'step': 17140, 'epoch': 3} {'type': 'loss', 'content': 0.055164165794849396, 'timestamp': '2025-09-30 22:39:22.723748', 'step': 17141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:22.783816', 'step': 17141, 'epoch': 3} {'type': 'loss', 'content': 0.07773181051015854, 'timestamp': '2025-09-30 22:39:22.786061', 'step': 17142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:22.843370', 'step': 17142, 'epoch': 3} {'type': 'loss', 'content': 0.12691763043403625, 'timestamp': '2025-09-30 22:39:22.846472', 'step': 17143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:22.905401', 'step': 17143, 'epoch': 3} {'type': 'loss', 'content': 0.12415818870067596, 'timestamp': '2025-09-30 22:39:22.911122', 'step': 17144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:22.968943', 'step': 17144, 'epoch': 3} {'type': 'loss', 'content': 0.08070449531078339, 'timestamp': '2025-09-30 22:39:22.971545', 'step': 17145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:23.035618', 'step': 17145, 'epoch': 3} {'type': 'loss', 'content': 0.1539960354566574, 'timestamp': '2025-09-30 22:39:23.038301', 'step': 17146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:23.105720', 'step': 17146, 'epoch': 3} {'type': 'loss', 'content': 0.09657707810401917, 'timestamp': '2025-09-30 22:39:23.108260', 'step': 17147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:23.175806', 'step': 17147, 'epoch': 3} {'type': 'loss', 'content': 0.08671770244836807, 'timestamp': '2025-09-30 22:39:23.181850', 'step': 17148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:23.238773', 'step': 17148, 'epoch': 3} {'type': 'loss', 'content': 0.07679495960474014, 'timestamp': '2025-09-30 22:39:23.241761', 'step': 17149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:23.301585', 'step': 17149, 'epoch': 3} {'type': 'loss', 'content': 0.07012677937746048, 'timestamp': '2025-09-30 22:39:23.303964', 'step': 17150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:23.363411', 'step': 17150, 'epoch': 3} {'type': 'loss', 'content': 0.08271721005439758, 'timestamp': '2025-09-30 22:39:23.366219', 'step': 17151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:23.435686', 'step': 17151, 'epoch': 3} {'type': 'loss', 'content': 0.1113932877779007, 'timestamp': '2025-09-30 22:39:23.442558', 'step': 17152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:23.498507', 'step': 17152, 'epoch': 3} {'type': 'loss', 'content': 0.10899920016527176, 'timestamp': '2025-09-30 22:39:23.500974', 'step': 17153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:23.559187', 'step': 17153, 'epoch': 3} {'type': 'loss', 'content': 0.10548358410596848, 'timestamp': '2025-09-30 22:39:23.561429', 'step': 17154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:23.618806', 'step': 17154, 'epoch': 3} {'type': 'loss', 'content': 0.10024740546941757, 'timestamp': '2025-09-30 22:39:23.621784', 'step': 17155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:23.678819', 'step': 17155, 'epoch': 3} {'type': 'loss', 'content': 0.09081621468067169, 'timestamp': '2025-09-30 22:39:23.685336', 'step': 17156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:23.741372', 'step': 17156, 'epoch': 3} {'type': 'loss', 'content': 0.13986043632030487, 'timestamp': '2025-09-30 22:39:23.744349', 'step': 17157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:23.803720', 'step': 17157, 'epoch': 3} {'type': 'loss', 'content': 0.09557145833969116, 'timestamp': '2025-09-30 22:39:23.805626', 'step': 17158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:23.873339', 'step': 17158, 'epoch': 3} {'type': 'loss', 'content': 0.07296419143676758, 'timestamp': '2025-09-30 22:39:23.875912', 'step': 17159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:23.947051', 'step': 17159, 'epoch': 3} {'type': 'loss', 'content': 0.0503469854593277, 'timestamp': '2025-09-30 22:39:23.952750', 'step': 17160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:24.009333', 'step': 17160, 'epoch': 3} {'type': 'loss', 'content': 0.13495159149169922, 'timestamp': '2025-09-30 22:39:24.012520', 'step': 17161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:24.069626', 'step': 17161, 'epoch': 3} {'type': 'loss', 'content': 0.10684226453304291, 'timestamp': '2025-09-30 22:39:24.073117', 'step': 17162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:24.130451', 'step': 17162, 'epoch': 3} {'type': 'loss', 'content': 0.17750775814056396, 'timestamp': '2025-09-30 22:39:24.134188', 'step': 17163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:24.192768', 'step': 17163, 'epoch': 3} {'type': 'loss', 'content': 0.03852385655045509, 'timestamp': '2025-09-30 22:39:24.199812', 'step': 17164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:24.257232', 'step': 17164, 'epoch': 3} {'type': 'loss', 'content': 0.15570007264614105, 'timestamp': '2025-09-30 22:39:24.259918', 'step': 17165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:24.322282', 'step': 17165, 'epoch': 3} {'type': 'loss', 'content': 0.11591985821723938, 'timestamp': '2025-09-30 22:39:24.324845', 'step': 17166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:24.384822', 'step': 17166, 'epoch': 3} {'type': 'loss', 'content': 0.09835168719291687, 'timestamp': '2025-09-30 22:39:24.387501', 'step': 17167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:24.445115', 'step': 17167, 'epoch': 3} {'type': 'loss', 'content': 0.06390281021595001, 'timestamp': '2025-09-30 22:39:24.450887', 'step': 17168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:24.507787', 'step': 17168, 'epoch': 3} {'type': 'loss', 'content': 0.05117319896817207, 'timestamp': '2025-09-30 22:39:24.510630', 'step': 17169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:24.567528', 'step': 17169, 'epoch': 3} {'type': 'loss', 'content': 0.13523836433887482, 'timestamp': '2025-09-30 22:39:24.570044', 'step': 17170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:24.628378', 'step': 17170, 'epoch': 3} {'type': 'loss', 'content': 0.05762314796447754, 'timestamp': '2025-09-30 22:39:24.630876', 'step': 17171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:24.689059', 'step': 17171, 'epoch': 3} {'type': 'loss', 'content': 0.11489289253950119, 'timestamp': '2025-09-30 22:39:24.695608', 'step': 17172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:24.751837', 'step': 17172, 'epoch': 3} {'type': 'loss', 'content': 0.06530977040529251, 'timestamp': '2025-09-30 22:39:24.754735', 'step': 17173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:24.812845', 'step': 17173, 'epoch': 3} {'type': 'loss', 'content': 0.09759954363107681, 'timestamp': '2025-09-30 22:39:24.816038', 'step': 17174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:39:24.873723', 'step': 17174, 'epoch': 3} {'type': 'loss', 'content': 0.10838005691766739, 'timestamp': '2025-09-30 22:39:24.876133', 'step': 17175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:24.935460', 'step': 17175, 'epoch': 3} {'type': 'loss', 'content': 0.05141172558069229, 'timestamp': '2025-09-30 22:39:24.941691', 'step': 17176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:25.000339', 'step': 17176, 'epoch': 3} {'type': 'loss', 'content': 0.1735372394323349, 'timestamp': '2025-09-30 22:39:25.004349', 'step': 17177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:25.061497', 'step': 17177, 'epoch': 3} {'type': 'loss', 'content': 0.030861468985676765, 'timestamp': '2025-09-30 22:39:25.065110', 'step': 17178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:25.123078', 'step': 17178, 'epoch': 3} {'type': 'loss', 'content': 0.17203402519226074, 'timestamp': '2025-09-30 22:39:25.125483', 'step': 17179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:25.183858', 'step': 17179, 'epoch': 3} {'type': 'loss', 'content': 0.05142917111515999, 'timestamp': '2025-09-30 22:39:25.189591', 'step': 17180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:25.246683', 'step': 17180, 'epoch': 3} {'type': 'loss', 'content': 0.09825710952281952, 'timestamp': '2025-09-30 22:39:25.249203', 'step': 17181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:25.306312', 'step': 17181, 'epoch': 3} {'type': 'loss', 'content': 0.14480160176753998, 'timestamp': '2025-09-30 22:39:25.308492', 'step': 17182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:25.367243', 'step': 17182, 'epoch': 3} {'type': 'loss', 'content': 0.07652252912521362, 'timestamp': '2025-09-30 22:39:25.369757', 'step': 17183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:25.426728', 'step': 17183, 'epoch': 3} {'type': 'loss', 'content': 0.06351295113563538, 'timestamp': '2025-09-30 22:39:25.432738', 'step': 17184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:25.502763', 'step': 17184, 'epoch': 3} {'type': 'loss', 'content': 0.09240001440048218, 'timestamp': '2025-09-30 22:39:25.506413', 'step': 17185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:25.563990', 'step': 17185, 'epoch': 3} {'type': 'loss', 'content': 0.08528148382902145, 'timestamp': '2025-09-30 22:39:25.569439', 'step': 17186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:25.628591', 'step': 17186, 'epoch': 3} {'type': 'loss', 'content': 0.10544819384813309, 'timestamp': '2025-09-30 22:39:25.630888', 'step': 17187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:25.688891', 'step': 17187, 'epoch': 3} {'type': 'loss', 'content': 0.03401748463511467, 'timestamp': '2025-09-30 22:39:25.695758', 'step': 17188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:25.752319', 'step': 17188, 'epoch': 3} {'type': 'loss', 'content': 0.14978189766407013, 'timestamp': '2025-09-30 22:39:25.756145', 'step': 17189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:25.813633', 'step': 17189, 'epoch': 3} {'type': 'loss', 'content': 0.06944763660430908, 'timestamp': '2025-09-30 22:39:25.816764', 'step': 17190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:25.874422', 'step': 17190, 'epoch': 3} {'type': 'loss', 'content': 0.09421012550592422, 'timestamp': '2025-09-30 22:39:25.876924', 'step': 17191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:25.939472', 'step': 17191, 'epoch': 3} {'type': 'loss', 'content': 0.08856161683797836, 'timestamp': '2025-09-30 22:39:25.952362', 'step': 17192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:26.010047', 'step': 17192, 'epoch': 3} {'type': 'loss', 'content': 0.08328559994697571, 'timestamp': '2025-09-30 22:39:26.012803', 'step': 17193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:26.078294', 'step': 17193, 'epoch': 3} {'type': 'loss', 'content': 0.050831642001867294, 'timestamp': '2025-09-30 22:39:26.080713', 'step': 17194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.138336', 'step': 17194, 'epoch': 3} {'type': 'loss', 'content': 0.09233936667442322, 'timestamp': '2025-09-30 22:39:26.145071', 'step': 17195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.205747', 'step': 17195, 'epoch': 3} {'type': 'loss', 'content': 0.14670896530151367, 'timestamp': '2025-09-30 22:39:26.212931', 'step': 17196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.276365', 'step': 17196, 'epoch': 3} {'type': 'loss', 'content': 0.07726491987705231, 'timestamp': '2025-09-30 22:39:26.278935', 'step': 17197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.336886', 'step': 17197, 'epoch': 3} {'type': 'loss', 'content': 0.1119329184293747, 'timestamp': '2025-09-30 22:39:26.339070', 'step': 17198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.395510', 'step': 17198, 'epoch': 3} {'type': 'loss', 'content': 0.12145330011844635, 'timestamp': '2025-09-30 22:39:26.398215', 'step': 17199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:26.487058', 'step': 17199, 'epoch': 3} {'type': 'loss', 'content': 0.1590048372745514, 'timestamp': '2025-09-30 22:39:26.493072', 'step': 17200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.561306', 'step': 17200, 'epoch': 3} {'type': 'loss', 'content': 0.16309429705142975, 'timestamp': '2025-09-30 22:39:26.565116', 'step': 17201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.623089', 'step': 17201, 'epoch': 3} {'type': 'loss', 'content': 0.09239541739225388, 'timestamp': '2025-09-30 22:39:26.625402', 'step': 17202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:26.684665', 'step': 17202, 'epoch': 3} {'type': 'loss', 'content': 0.14754600822925568, 'timestamp': '2025-09-30 22:39:26.686924', 'step': 17203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:26.744577', 'step': 17203, 'epoch': 3} {'type': 'loss', 'content': 0.08963332325220108, 'timestamp': '2025-09-30 22:39:26.750473', 'step': 17204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:26.807166', 'step': 17204, 'epoch': 3} {'type': 'loss', 'content': 0.05572521314024925, 'timestamp': '2025-09-30 22:39:26.809442', 'step': 17205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:26.866573', 'step': 17205, 'epoch': 3} {'type': 'loss', 'content': 0.05584363639354706, 'timestamp': '2025-09-30 22:39:26.870201', 'step': 17206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:26.926896', 'step': 17206, 'epoch': 3} {'type': 'loss', 'content': 0.04420273005962372, 'timestamp': '2025-09-30 22:39:26.929656', 'step': 17207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:26.986705', 'step': 17207, 'epoch': 3} {'type': 'loss', 'content': 0.11421338468790054, 'timestamp': '2025-09-30 22:39:26.992712', 'step': 17208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:27.048791', 'step': 17208, 'epoch': 3} {'type': 'loss', 'content': 0.12431138008832932, 'timestamp': '2025-09-30 22:39:27.051021', 'step': 17209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:27.108085', 'step': 17209, 'epoch': 3} {'type': 'loss', 'content': 0.0819777399301529, 'timestamp': '2025-09-30 22:39:27.110639', 'step': 17210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:27.178117', 'step': 17210, 'epoch': 3} {'type': 'loss', 'content': 0.1480632722377777, 'timestamp': '2025-09-30 22:39:27.180638', 'step': 17211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:27.237950', 'step': 17211, 'epoch': 3} {'type': 'loss', 'content': 0.11383617669343948, 'timestamp': '2025-09-30 22:39:27.243651', 'step': 17212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:27.300649', 'step': 17212, 'epoch': 3} {'type': 'loss', 'content': 0.09772704541683197, 'timestamp': '2025-09-30 22:39:27.304113', 'step': 17213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:27.363255', 'step': 17213, 'epoch': 3} {'type': 'loss', 'content': 0.1057526096701622, 'timestamp': '2025-09-30 22:39:27.367291', 'step': 17214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:27.425309', 'step': 17214, 'epoch': 3} {'type': 'loss', 'content': 0.1628103405237198, 'timestamp': '2025-09-30 22:39:27.427617', 'step': 17215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:27.486639', 'step': 17215, 'epoch': 3} {'type': 'loss', 'content': 0.05748145282268524, 'timestamp': '2025-09-30 22:39:27.497813', 'step': 17216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:27.566863', 'step': 17216, 'epoch': 3} {'type': 'loss', 'content': 0.10359635204076767, 'timestamp': '2025-09-30 22:39:27.569448', 'step': 17217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:27.633220', 'step': 17217, 'epoch': 3} {'type': 'loss', 'content': 0.041892584413290024, 'timestamp': '2025-09-30 22:39:27.637520', 'step': 17218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:27.699691', 'step': 17218, 'epoch': 3} {'type': 'loss', 'content': 0.08240876346826553, 'timestamp': '2025-09-30 22:39:27.702927', 'step': 17219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:27.762610', 'step': 17219, 'epoch': 3} {'type': 'loss', 'content': 0.11247269809246063, 'timestamp': '2025-09-30 22:39:27.768881', 'step': 17220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:27.830336', 'step': 17220, 'epoch': 3} {'type': 'loss', 'content': 0.06029330939054489, 'timestamp': '2025-09-30 22:39:27.833368', 'step': 17221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:27.891956', 'step': 17221, 'epoch': 3} {'type': 'loss', 'content': 0.044157665222883224, 'timestamp': '2025-09-30 22:39:27.894194', 'step': 17222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:27.952868', 'step': 17222, 'epoch': 3} {'type': 'loss', 'content': 0.13472387194633484, 'timestamp': '2025-09-30 22:39:27.955414', 'step': 17223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:28.012374', 'step': 17223, 'epoch': 3} {'type': 'loss', 'content': 0.11390212923288345, 'timestamp': '2025-09-30 22:39:28.018282', 'step': 17224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:28.075473', 'step': 17224, 'epoch': 3} {'type': 'loss', 'content': 0.13342246413230896, 'timestamp': '2025-09-30 22:39:28.078365', 'step': 17225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:28.137541', 'step': 17225, 'epoch': 3} {'type': 'loss', 'content': 0.06745347380638123, 'timestamp': '2025-09-30 22:39:28.140034', 'step': 17226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:28.198317', 'step': 17226, 'epoch': 3} {'type': 'loss', 'content': 0.05469997599720955, 'timestamp': '2025-09-30 22:39:28.200631', 'step': 17227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:28.258136', 'step': 17227, 'epoch': 3} {'type': 'loss', 'content': 0.12874670326709747, 'timestamp': '2025-09-30 22:39:28.263909', 'step': 17228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:28.321437', 'step': 17228, 'epoch': 3} {'type': 'loss', 'content': 0.1431557983160019, 'timestamp': '2025-09-30 22:39:28.323826', 'step': 17229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:28.389296', 'step': 17229, 'epoch': 3} {'type': 'loss', 'content': 0.14294575154781342, 'timestamp': '2025-09-30 22:39:28.391767', 'step': 17230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:28.451191', 'step': 17230, 'epoch': 3} {'type': 'loss', 'content': 0.03568818420171738, 'timestamp': '2025-09-30 22:39:28.453729', 'step': 17231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:28.515345', 'step': 17231, 'epoch': 3} {'type': 'loss', 'content': 0.04803924635052681, 'timestamp': '2025-09-30 22:39:28.521109', 'step': 17232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:28.578321', 'step': 17232, 'epoch': 3} {'type': 'loss', 'content': 0.06078127771615982, 'timestamp': '2025-09-30 22:39:28.580674', 'step': 17233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:28.640174', 'step': 17233, 'epoch': 3} {'type': 'loss', 'content': 0.07907141745090485, 'timestamp': '2025-09-30 22:39:28.642749', 'step': 17234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:28.701274', 'step': 17234, 'epoch': 3} {'type': 'loss', 'content': 0.06911428272724152, 'timestamp': '2025-09-30 22:39:28.714758', 'step': 17235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:28.774579', 'step': 17235, 'epoch': 3} {'type': 'loss', 'content': 0.06657347083091736, 'timestamp': '2025-09-30 22:39:28.782077', 'step': 17236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:28.838838', 'step': 17236, 'epoch': 3} {'type': 'loss', 'content': 0.03900960460305214, 'timestamp': '2025-09-30 22:39:28.841642', 'step': 17237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:28.898614', 'step': 17237, 'epoch': 3} {'type': 'loss', 'content': 0.07141587883234024, 'timestamp': '2025-09-30 22:39:28.903660', 'step': 17238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:28.961235', 'step': 17238, 'epoch': 3} {'type': 'loss', 'content': 0.10080112516880035, 'timestamp': '2025-09-30 22:39:28.964633', 'step': 17239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:39:29.023280', 'step': 17239, 'epoch': 3} {'type': 'loss', 'content': 0.02235049568116665, 'timestamp': '2025-09-30 22:39:29.029519', 'step': 17240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:29.104886', 'step': 17240, 'epoch': 3} {'type': 'loss', 'content': 0.12995801866054535, 'timestamp': '2025-09-30 22:39:29.108300', 'step': 17241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:29.167857', 'step': 17241, 'epoch': 3} {'type': 'loss', 'content': 0.07980542629957199, 'timestamp': '2025-09-30 22:39:29.170441', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:39:44.671518', 'step': 17242, 'epoch': 3} {'type': 'pplx', 'content': 8483.85555227187, 'timestamp': '2025-09-30 22:39:44.674748', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:44.732647', 'step': 17242, 'epoch': 3} {'type': 'loss', 'content': 0.09184020012617111, 'timestamp': '2025-09-30 22:39:44.735195', 'step': 17243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:44.793423', 'step': 17243, 'epoch': 3} {'type': 'loss', 'content': 0.13105951249599457, 'timestamp': '2025-09-30 22:39:44.799982', 'step': 17244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:44.856799', 'step': 17244, 'epoch': 3} {'type': 'loss', 'content': 0.15085910260677338, 'timestamp': '2025-09-30 22:39:44.859295', 'step': 17245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:44.915322', 'step': 17245, 'epoch': 3} {'type': 'loss', 'content': 0.04710223525762558, 'timestamp': '2025-09-30 22:39:44.917321', 'step': 17246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:44.973390', 'step': 17246, 'epoch': 3} {'type': 'loss', 'content': 0.0979679673910141, 'timestamp': '2025-09-30 22:39:44.975646', 'step': 17247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:45.032808', 'step': 17247, 'epoch': 3} {'type': 'loss', 'content': 0.06884469091892242, 'timestamp': '2025-09-30 22:39:45.039544', 'step': 17248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:45.106537', 'step': 17248, 'epoch': 3} {'type': 'loss', 'content': 0.09717191010713577, 'timestamp': '2025-09-30 22:39:45.108903', 'step': 17249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:45.166365', 'step': 17249, 'epoch': 3} {'type': 'loss', 'content': 0.03367845341563225, 'timestamp': '2025-09-30 22:39:45.168757', 'step': 17250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:45.225063', 'step': 17250, 'epoch': 3} {'type': 'loss', 'content': 0.055902570486068726, 'timestamp': '2025-09-30 22:39:45.227236', 'step': 17251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:45.283111', 'step': 17251, 'epoch': 3} {'type': 'loss', 'content': 0.16337013244628906, 'timestamp': '2025-09-30 22:39:45.289106', 'step': 17252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:45.344544', 'step': 17252, 'epoch': 3} {'type': 'loss', 'content': 0.06360101699829102, 'timestamp': '2025-09-30 22:39:45.347136', 'step': 17253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:45.404000', 'step': 17253, 'epoch': 3} {'type': 'loss', 'content': 0.1225607767701149, 'timestamp': '2025-09-30 22:39:45.406457', 'step': 17254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:45.473558', 'step': 17254, 'epoch': 3} {'type': 'loss', 'content': 0.09378577023744583, 'timestamp': '2025-09-30 22:39:45.475950', 'step': 17255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:45.533398', 'step': 17255, 'epoch': 3} {'type': 'loss', 'content': 0.1271318942308426, 'timestamp': '2025-09-30 22:39:45.539286', 'step': 17256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:45.595888', 'step': 17256, 'epoch': 3} {'type': 'loss', 'content': 0.062762551009655, 'timestamp': '2025-09-30 22:39:45.598291', 'step': 17257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:45.655915', 'step': 17257, 'epoch': 3} {'type': 'loss', 'content': 0.0568092055618763, 'timestamp': '2025-09-30 22:39:45.658924', 'step': 17258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:45.715775', 'step': 17258, 'epoch': 3} {'type': 'loss', 'content': 0.04322044551372528, 'timestamp': '2025-09-30 22:39:45.718293', 'step': 17259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:45.775658', 'step': 17259, 'epoch': 3} {'type': 'loss', 'content': 0.13408872485160828, 'timestamp': '2025-09-30 22:39:45.782027', 'step': 17260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:45.848792', 'step': 17260, 'epoch': 3} {'type': 'loss', 'content': 0.07419678568840027, 'timestamp': '2025-09-30 22:39:45.851388', 'step': 17261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:45.910091', 'step': 17261, 'epoch': 3} {'type': 'loss', 'content': 0.10475987941026688, 'timestamp': '2025-09-30 22:39:45.912932', 'step': 17262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:45.969569', 'step': 17262, 'epoch': 3} {'type': 'loss', 'content': 0.1446225345134735, 'timestamp': '2025-09-30 22:39:45.972640', 'step': 17263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:46.030730', 'step': 17263, 'epoch': 3} {'type': 'loss', 'content': 0.0850832462310791, 'timestamp': '2025-09-30 22:39:46.038197', 'step': 17264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:46.097016', 'step': 17264, 'epoch': 3} {'type': 'loss', 'content': 0.12478792667388916, 'timestamp': '2025-09-30 22:39:46.100775', 'step': 17265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:46.158593', 'step': 17265, 'epoch': 3} {'type': 'loss', 'content': 0.09027531743049622, 'timestamp': '2025-09-30 22:39:46.160753', 'step': 17266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:46.225145', 'step': 17266, 'epoch': 3} {'type': 'loss', 'content': 0.04691432788968086, 'timestamp': '2025-09-30 22:39:46.230180', 'step': 17267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:46.288134', 'step': 17267, 'epoch': 3} {'type': 'loss', 'content': 0.06476146727800369, 'timestamp': '2025-09-30 22:39:46.293945', 'step': 17268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:46.350502', 'step': 17268, 'epoch': 3} {'type': 'loss', 'content': 0.1443108469247818, 'timestamp': '2025-09-30 22:39:46.353465', 'step': 17269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:46.410079', 'step': 17269, 'epoch': 3} {'type': 'loss', 'content': 0.05901537090539932, 'timestamp': '2025-09-30 22:39:46.413110', 'step': 17270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:46.470654', 'step': 17270, 'epoch': 3} {'type': 'loss', 'content': 0.1400618553161621, 'timestamp': '2025-09-30 22:39:46.474460', 'step': 17271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:46.532200', 'step': 17271, 'epoch': 3} {'type': 'loss', 'content': 0.054584819823503494, 'timestamp': '2025-09-30 22:39:46.546357', 'step': 17272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:46.604588', 'step': 17272, 'epoch': 3} {'type': 'loss', 'content': 0.05297139286994934, 'timestamp': '2025-09-30 22:39:46.607117', 'step': 17273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:46.664670', 'step': 17273, 'epoch': 3} {'type': 'loss', 'content': 0.031983230262994766, 'timestamp': '2025-09-30 22:39:46.668053', 'step': 17274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:46.725658', 'step': 17274, 'epoch': 3} {'type': 'loss', 'content': 0.08325748890638351, 'timestamp': '2025-09-30 22:39:46.728511', 'step': 17275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:46.786169', 'step': 17275, 'epoch': 3} {'type': 'loss', 'content': 0.058773189783096313, 'timestamp': '2025-09-30 22:39:46.792917', 'step': 17276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:46.853604', 'step': 17276, 'epoch': 3} {'type': 'loss', 'content': 0.08365900069475174, 'timestamp': '2025-09-30 22:39:46.856472', 'step': 17277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:46.929055', 'step': 17277, 'epoch': 3} {'type': 'loss', 'content': 0.058367177844047546, 'timestamp': '2025-09-30 22:39:46.932323', 'step': 17278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:46.990922', 'step': 17278, 'epoch': 3} {'type': 'loss', 'content': 0.08969133347272873, 'timestamp': '2025-09-30 22:39:46.993780', 'step': 17279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:47.051814', 'step': 17279, 'epoch': 3} {'type': 'loss', 'content': 0.12954604625701904, 'timestamp': '2025-09-30 22:39:47.057861', 'step': 17280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:47.115481', 'step': 17280, 'epoch': 3} {'type': 'loss', 'content': 0.07813628017902374, 'timestamp': '2025-09-30 22:39:47.118946', 'step': 17281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:47.176132', 'step': 17281, 'epoch': 3} {'type': 'loss', 'content': 0.1321113407611847, 'timestamp': '2025-09-30 22:39:47.178658', 'step': 17282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:47.237151', 'step': 17282, 'epoch': 3} {'type': 'loss', 'content': 0.1229584738612175, 'timestamp': '2025-09-30 22:39:47.240429', 'step': 17283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:39:47.300266', 'step': 17283, 'epoch': 3} {'type': 'loss', 'content': 0.2058851569890976, 'timestamp': '2025-09-30 22:39:47.307494', 'step': 17284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:47.364286', 'step': 17284, 'epoch': 3} {'type': 'loss', 'content': 0.11923956125974655, 'timestamp': '2025-09-30 22:39:47.370018', 'step': 17285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:47.426148', 'step': 17285, 'epoch': 3} {'type': 'loss', 'content': 0.12200425565242767, 'timestamp': '2025-09-30 22:39:47.428883', 'step': 17286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:47.499277', 'step': 17286, 'epoch': 3} {'type': 'loss', 'content': 0.14976119995117188, 'timestamp': '2025-09-30 22:39:47.502026', 'step': 17287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:47.558598', 'step': 17287, 'epoch': 3} {'type': 'loss', 'content': 0.04553995281457901, 'timestamp': '2025-09-30 22:39:47.564708', 'step': 17288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:47.623632', 'step': 17288, 'epoch': 3} {'type': 'loss', 'content': 0.14642265439033508, 'timestamp': '2025-09-30 22:39:47.625948', 'step': 17289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:47.682900', 'step': 17289, 'epoch': 3} {'type': 'loss', 'content': 0.05449419468641281, 'timestamp': '2025-09-30 22:39:47.685202', 'step': 17290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:47.741507', 'step': 17290, 'epoch': 3} {'type': 'loss', 'content': 0.051586929708719254, 'timestamp': '2025-09-30 22:39:47.743571', 'step': 17291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:47.800047', 'step': 17291, 'epoch': 3} {'type': 'loss', 'content': 0.05491616949439049, 'timestamp': '2025-09-30 22:39:47.805865', 'step': 17292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:47.865573', 'step': 17292, 'epoch': 3} {'type': 'loss', 'content': 0.10933725535869598, 'timestamp': '2025-09-30 22:39:47.874547', 'step': 17293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:47.931004', 'step': 17293, 'epoch': 3} {'type': 'loss', 'content': 0.11975744366645813, 'timestamp': '2025-09-30 22:39:47.933545', 'step': 17294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:47.991645', 'step': 17294, 'epoch': 3} {'type': 'loss', 'content': 0.06048757955431938, 'timestamp': '2025-09-30 22:39:47.994054', 'step': 17295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:48.052265', 'step': 17295, 'epoch': 3} {'type': 'loss', 'content': 0.14106620848178864, 'timestamp': '2025-09-30 22:39:48.058606', 'step': 17296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:48.117529', 'step': 17296, 'epoch': 3} {'type': 'loss', 'content': 0.09164533764123917, 'timestamp': '2025-09-30 22:39:48.120088', 'step': 17297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:48.178008', 'step': 17297, 'epoch': 3} {'type': 'loss', 'content': 0.06296011805534363, 'timestamp': '2025-09-30 22:39:48.180385', 'step': 17298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:48.238534', 'step': 17298, 'epoch': 3} {'type': 'loss', 'content': 0.06140240281820297, 'timestamp': '2025-09-30 22:39:48.242122', 'step': 17299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:48.300504', 'step': 17299, 'epoch': 3} {'type': 'loss', 'content': 0.05590622127056122, 'timestamp': '2025-09-30 22:39:48.306678', 'step': 17300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:48.363074', 'step': 17300, 'epoch': 3} {'type': 'loss', 'content': 0.08250206708908081, 'timestamp': '2025-09-30 22:39:48.365848', 'step': 17301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:48.422849', 'step': 17301, 'epoch': 3} {'type': 'loss', 'content': 0.12519018352031708, 'timestamp': '2025-09-30 22:39:48.429173', 'step': 17302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:48.496324', 'step': 17302, 'epoch': 3} {'type': 'loss', 'content': 0.16166074573993683, 'timestamp': '2025-09-30 22:39:48.498537', 'step': 17303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:48.559786', 'step': 17303, 'epoch': 3} {'type': 'loss', 'content': 0.16521969437599182, 'timestamp': '2025-09-30 22:39:48.565506', 'step': 17304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:48.622847', 'step': 17304, 'epoch': 3} {'type': 'loss', 'content': 0.08186645060777664, 'timestamp': '2025-09-30 22:39:48.625071', 'step': 17305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:48.682831', 'step': 17305, 'epoch': 3} {'type': 'loss', 'content': 0.11389180272817612, 'timestamp': '2025-09-30 22:39:48.685346', 'step': 17306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:48.742106', 'step': 17306, 'epoch': 3} {'type': 'loss', 'content': 0.10608364641666412, 'timestamp': '2025-09-30 22:39:48.744282', 'step': 17307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:48.823638', 'step': 17307, 'epoch': 3} {'type': 'loss', 'content': 0.041748691350221634, 'timestamp': '2025-09-30 22:39:48.829470', 'step': 17308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:48.888432', 'step': 17308, 'epoch': 3} {'type': 'loss', 'content': 0.12104500085115433, 'timestamp': '2025-09-30 22:39:48.891934', 'step': 17309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:48.950653', 'step': 17309, 'epoch': 3} {'type': 'loss', 'content': 0.12796717882156372, 'timestamp': '2025-09-30 22:39:48.953345', 'step': 17310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:49.010376', 'step': 17310, 'epoch': 3} {'type': 'loss', 'content': 0.15722285211086273, 'timestamp': '2025-09-30 22:39:49.013357', 'step': 17311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:49.071349', 'step': 17311, 'epoch': 3} {'type': 'loss', 'content': 0.15442584455013275, 'timestamp': '2025-09-30 22:39:49.077437', 'step': 17312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:49.133708', 'step': 17312, 'epoch': 3} {'type': 'loss', 'content': 0.15366636216640472, 'timestamp': '2025-09-30 22:39:49.136134', 'step': 17313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:49.193482', 'step': 17313, 'epoch': 3} {'type': 'loss', 'content': 0.08297501504421234, 'timestamp': '2025-09-30 22:39:49.195555', 'step': 17314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:49.252738', 'step': 17314, 'epoch': 3} {'type': 'loss', 'content': 0.1323644518852234, 'timestamp': '2025-09-30 22:39:49.255304', 'step': 17315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:49.312280', 'step': 17315, 'epoch': 3} {'type': 'loss', 'content': 0.10683614760637283, 'timestamp': '2025-09-30 22:39:49.318410', 'step': 17316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:49.374965', 'step': 17316, 'epoch': 3} {'type': 'loss', 'content': 0.040687959641218185, 'timestamp': '2025-09-30 22:39:49.377270', 'step': 17317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:49.432997', 'step': 17317, 'epoch': 3} {'type': 'loss', 'content': 0.06837789714336395, 'timestamp': '2025-09-30 22:39:49.435241', 'step': 17318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:49.492705', 'step': 17318, 'epoch': 3} {'type': 'loss', 'content': 0.15350234508514404, 'timestamp': '2025-09-30 22:39:49.495374', 'step': 17319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:49.561944', 'step': 17319, 'epoch': 3} {'type': 'loss', 'content': 0.055848609656095505, 'timestamp': '2025-09-30 22:39:49.567844', 'step': 17320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:49.637541', 'step': 17320, 'epoch': 3} {'type': 'loss', 'content': 0.11360683292150497, 'timestamp': '2025-09-30 22:39:49.639812', 'step': 17321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:49.697400', 'step': 17321, 'epoch': 3} {'type': 'loss', 'content': 0.052115168422460556, 'timestamp': '2025-09-30 22:39:49.699840', 'step': 17322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:49.758165', 'step': 17322, 'epoch': 3} {'type': 'loss', 'content': 0.10193128883838654, 'timestamp': '2025-09-30 22:39:49.760783', 'step': 17323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:49.817517', 'step': 17323, 'epoch': 3} {'type': 'loss', 'content': 0.06846068799495697, 'timestamp': '2025-09-30 22:39:49.823329', 'step': 17324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:49.880889', 'step': 17324, 'epoch': 3} {'type': 'loss', 'content': 0.04613935947418213, 'timestamp': '2025-09-30 22:39:49.882850', 'step': 17325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:49.947167', 'step': 17325, 'epoch': 3} {'type': 'loss', 'content': 0.062131285667419434, 'timestamp': '2025-09-30 22:39:49.949645', 'step': 17326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:50.005996', 'step': 17326, 'epoch': 3} {'type': 'loss', 'content': 0.06508105993270874, 'timestamp': '2025-09-30 22:39:50.008491', 'step': 17327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:50.066470', 'step': 17327, 'epoch': 3} {'type': 'loss', 'content': 0.04959772899746895, 'timestamp': '2025-09-30 22:39:50.072268', 'step': 17328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:50.128963', 'step': 17328, 'epoch': 3} {'type': 'loss', 'content': 0.16012685000896454, 'timestamp': '2025-09-30 22:39:50.131327', 'step': 17329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:50.191923', 'step': 17329, 'epoch': 3} {'type': 'loss', 'content': 0.09289110451936722, 'timestamp': '2025-09-30 22:39:50.194478', 'step': 17330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:50.260886', 'step': 17330, 'epoch': 3} {'type': 'loss', 'content': 0.17744776606559753, 'timestamp': '2025-09-30 22:39:50.263079', 'step': 17331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:50.319889', 'step': 17331, 'epoch': 3} {'type': 'loss', 'content': 0.10703931003808975, 'timestamp': '2025-09-30 22:39:50.331193', 'step': 17332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:50.388832', 'step': 17332, 'epoch': 3} {'type': 'loss', 'content': 0.08524683117866516, 'timestamp': '2025-09-30 22:39:50.392176', 'step': 17333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:50.449809', 'step': 17333, 'epoch': 3} {'type': 'loss', 'content': 0.0827832743525505, 'timestamp': '2025-09-30 22:39:50.452101', 'step': 17334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:50.511531', 'step': 17334, 'epoch': 3} {'type': 'loss', 'content': 0.06921414285898209, 'timestamp': '2025-09-30 22:39:50.514754', 'step': 17335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:50.581570', 'step': 17335, 'epoch': 3} {'type': 'loss', 'content': 0.159090057015419, 'timestamp': '2025-09-30 22:39:50.587680', 'step': 17336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:50.644226', 'step': 17336, 'epoch': 3} {'type': 'loss', 'content': 0.11062370240688324, 'timestamp': '2025-09-30 22:39:50.646549', 'step': 17337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:50.704775', 'step': 17337, 'epoch': 3} {'type': 'loss', 'content': 0.17573294043540955, 'timestamp': '2025-09-30 22:39:50.708005', 'step': 17338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:50.765136', 'step': 17338, 'epoch': 3} {'type': 'loss', 'content': 0.08361946046352386, 'timestamp': '2025-09-30 22:39:50.767740', 'step': 17339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:50.831088', 'step': 17339, 'epoch': 3} {'type': 'loss', 'content': 0.04684209078550339, 'timestamp': '2025-09-30 22:39:50.837013', 'step': 17340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:50.906929', 'step': 17340, 'epoch': 3} {'type': 'loss', 'content': 0.12474275380373001, 'timestamp': '2025-09-30 22:39:50.911519', 'step': 17341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:50.976587', 'step': 17341, 'epoch': 3} {'type': 'loss', 'content': 0.09397480636835098, 'timestamp': '2025-09-30 22:39:50.980787', 'step': 17342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:51.049844', 'step': 17342, 'epoch': 3} {'type': 'loss', 'content': 0.12019640952348709, 'timestamp': '2025-09-30 22:39:51.054248', 'step': 17343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:51.110911', 'step': 17343, 'epoch': 3} {'type': 'loss', 'content': 0.04173118621110916, 'timestamp': '2025-09-30 22:39:51.118338', 'step': 17344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.177513', 'step': 17344, 'epoch': 3} {'type': 'loss', 'content': 0.14817456901073456, 'timestamp': '2025-09-30 22:39:51.182199', 'step': 17345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.242028', 'step': 17345, 'epoch': 3} {'type': 'loss', 'content': 0.0897228792309761, 'timestamp': '2025-09-30 22:39:51.244786', 'step': 17346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.328463', 'step': 17346, 'epoch': 3} {'type': 'loss', 'content': 0.049507636576890945, 'timestamp': '2025-09-30 22:39:51.332683', 'step': 17347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:51.398978', 'step': 17347, 'epoch': 3} {'type': 'loss', 'content': 0.17549437284469604, 'timestamp': '2025-09-30 22:39:51.405455', 'step': 17348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.465631', 'step': 17348, 'epoch': 3} {'type': 'loss', 'content': 0.06437164545059204, 'timestamp': '2025-09-30 22:39:51.468467', 'step': 17349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:51.535205', 'step': 17349, 'epoch': 3} {'type': 'loss', 'content': 0.06030430272221565, 'timestamp': '2025-09-30 22:39:51.537911', 'step': 17350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.596579', 'step': 17350, 'epoch': 3} {'type': 'loss', 'content': 0.1376487910747528, 'timestamp': '2025-09-30 22:39:51.599009', 'step': 17351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.656073', 'step': 17351, 'epoch': 3} {'type': 'loss', 'content': 0.07467668503522873, 'timestamp': '2025-09-30 22:39:51.667852', 'step': 17352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.724646', 'step': 17352, 'epoch': 3} {'type': 'loss', 'content': 0.10157538205385208, 'timestamp': '2025-09-30 22:39:51.726953', 'step': 17353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:51.783254', 'step': 17353, 'epoch': 3} {'type': 'loss', 'content': 0.05902206525206566, 'timestamp': '2025-09-30 22:39:51.785913', 'step': 17354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.846894', 'step': 17354, 'epoch': 3} {'type': 'loss', 'content': 0.16989995539188385, 'timestamp': '2025-09-30 22:39:51.849757', 'step': 17355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.907760', 'step': 17355, 'epoch': 3} {'type': 'loss', 'content': 0.07592479139566422, 'timestamp': '2025-09-30 22:39:51.915551', 'step': 17356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:51.971289', 'step': 17356, 'epoch': 3} {'type': 'loss', 'content': 0.07113020867109299, 'timestamp': '2025-09-30 22:39:51.974146', 'step': 17357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:52.031922', 'step': 17357, 'epoch': 3} {'type': 'loss', 'content': 0.10340213775634766, 'timestamp': '2025-09-30 22:39:52.035850', 'step': 17358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:52.095402', 'step': 17358, 'epoch': 3} {'type': 'loss', 'content': 0.07539516687393188, 'timestamp': '2025-09-30 22:39:52.098747', 'step': 17359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:52.168292', 'step': 17359, 'epoch': 3} {'type': 'loss', 'content': 0.10952694714069366, 'timestamp': '2025-09-30 22:39:52.177427', 'step': 17360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:52.246407', 'step': 17360, 'epoch': 3} {'type': 'loss', 'content': 0.1251746118068695, 'timestamp': '2025-09-30 22:39:52.248767', 'step': 17361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:52.308918', 'step': 17361, 'epoch': 3} {'type': 'loss', 'content': 0.11849802732467651, 'timestamp': '2025-09-30 22:39:52.312543', 'step': 17362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:52.369331', 'step': 17362, 'epoch': 3} {'type': 'loss', 'content': 0.08066275715827942, 'timestamp': '2025-09-30 22:39:52.372084', 'step': 17363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:52.441347', 'step': 17363, 'epoch': 3} {'type': 'loss', 'content': 0.15608792006969452, 'timestamp': '2025-09-30 22:39:52.448209', 'step': 17364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:52.505603', 'step': 17364, 'epoch': 3} {'type': 'loss', 'content': 0.005995563697069883, 'timestamp': '2025-09-30 22:39:52.508183', 'step': 17365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:52.567251', 'step': 17365, 'epoch': 3} {'type': 'loss', 'content': 0.10143685340881348, 'timestamp': '2025-09-30 22:39:52.572499', 'step': 17366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:52.634602', 'step': 17366, 'epoch': 3} {'type': 'loss', 'content': 0.07621322572231293, 'timestamp': '2025-09-30 22:39:52.637477', 'step': 17367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:52.698200', 'step': 17367, 'epoch': 3} {'type': 'loss', 'content': 0.1420566439628601, 'timestamp': '2025-09-30 22:39:52.704215', 'step': 17368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:52.764286', 'step': 17368, 'epoch': 3} {'type': 'loss', 'content': 0.0852019265294075, 'timestamp': '2025-09-30 22:39:52.767416', 'step': 17369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:52.825144', 'step': 17369, 'epoch': 3} {'type': 'loss', 'content': 0.08316437155008316, 'timestamp': '2025-09-30 22:39:52.827665', 'step': 17370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:52.885712', 'step': 17370, 'epoch': 3} {'type': 'loss', 'content': 0.03756386414170265, 'timestamp': '2025-09-30 22:39:52.888303', 'step': 17371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:52.947951', 'step': 17371, 'epoch': 3} {'type': 'loss', 'content': 0.14788119494915009, 'timestamp': '2025-09-30 22:39:52.953952', 'step': 17372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.013025', 'step': 17372, 'epoch': 3} {'type': 'loss', 'content': 0.11204328387975693, 'timestamp': '2025-09-30 22:39:53.019495', 'step': 17373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:53.083710', 'step': 17373, 'epoch': 3} {'type': 'loss', 'content': 0.028630521148443222, 'timestamp': '2025-09-30 22:39:53.094022', 'step': 17374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.154508', 'step': 17374, 'epoch': 3} {'type': 'loss', 'content': 0.0961657240986824, 'timestamp': '2025-09-30 22:39:53.157147', 'step': 17375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:53.214721', 'step': 17375, 'epoch': 3} {'type': 'loss', 'content': 0.11034903675317764, 'timestamp': '2025-09-30 22:39:53.221670', 'step': 17376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.279540', 'step': 17376, 'epoch': 3} {'type': 'loss', 'content': 0.04092935100197792, 'timestamp': '2025-09-30 22:39:53.288003', 'step': 17377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.350317', 'step': 17377, 'epoch': 3} {'type': 'loss', 'content': 0.07208269089460373, 'timestamp': '2025-09-30 22:39:53.354620', 'step': 17378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.429375', 'step': 17378, 'epoch': 3} {'type': 'loss', 'content': 0.07001453638076782, 'timestamp': '2025-09-30 22:39:53.432666', 'step': 17379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.492123', 'step': 17379, 'epoch': 3} {'type': 'loss', 'content': 0.048151880502700806, 'timestamp': '2025-09-30 22:39:53.504079', 'step': 17380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.565464', 'step': 17380, 'epoch': 3} {'type': 'loss', 'content': 0.09909426420927048, 'timestamp': '2025-09-30 22:39:53.577524', 'step': 17381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:53.636463', 'step': 17381, 'epoch': 3} {'type': 'loss', 'content': 0.07485305517911911, 'timestamp': '2025-09-30 22:39:53.639855', 'step': 17382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:53.701439', 'step': 17382, 'epoch': 3} {'type': 'loss', 'content': 0.1363864690065384, 'timestamp': '2025-09-30 22:39:53.710513', 'step': 17383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:53.779355', 'step': 17383, 'epoch': 3} {'type': 'loss', 'content': 0.0764557421207428, 'timestamp': '2025-09-30 22:39:53.785686', 'step': 17384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:53.843826', 'step': 17384, 'epoch': 3} {'type': 'loss', 'content': 0.0959542766213417, 'timestamp': '2025-09-30 22:39:53.847301', 'step': 17385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:53.905003', 'step': 17385, 'epoch': 3} {'type': 'loss', 'content': 0.062042996287345886, 'timestamp': '2025-09-30 22:39:53.908023', 'step': 17386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:53.971378', 'step': 17386, 'epoch': 3} {'type': 'loss', 'content': 0.045048899948596954, 'timestamp': '2025-09-30 22:39:53.977807', 'step': 17387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:54.037948', 'step': 17387, 'epoch': 3} {'type': 'loss', 'content': 0.17839254438877106, 'timestamp': '2025-09-30 22:39:54.046591', 'step': 17388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:54.104382', 'step': 17388, 'epoch': 3} {'type': 'loss', 'content': 0.05834732949733734, 'timestamp': '2025-09-30 22:39:54.113871', 'step': 17389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:54.174460', 'step': 17389, 'epoch': 3} {'type': 'loss', 'content': 0.24101148545742035, 'timestamp': '2025-09-30 22:39:54.177160', 'step': 17390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:54.235560', 'step': 17390, 'epoch': 3} {'type': 'loss', 'content': 0.05868414416909218, 'timestamp': '2025-09-30 22:39:54.238383', 'step': 17391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:54.297764', 'step': 17391, 'epoch': 3} {'type': 'loss', 'content': 0.0879313051700592, 'timestamp': '2025-09-30 22:39:54.304222', 'step': 17392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:54.368709', 'step': 17392, 'epoch': 3} {'type': 'loss', 'content': 0.06540914624929428, 'timestamp': '2025-09-30 22:39:54.379345', 'step': 17393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:54.439129', 'step': 17393, 'epoch': 3} {'type': 'loss', 'content': 0.13209941983222961, 'timestamp': '2025-09-30 22:39:54.445007', 'step': 17394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:54.506479', 'step': 17394, 'epoch': 3} {'type': 'loss', 'content': 0.06100733205676079, 'timestamp': '2025-09-30 22:39:54.510243', 'step': 17395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:54.580161', 'step': 17395, 'epoch': 3} {'type': 'loss', 'content': 0.09858101606369019, 'timestamp': '2025-09-30 22:39:54.586276', 'step': 17396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:54.643433', 'step': 17396, 'epoch': 3} {'type': 'loss', 'content': 0.13565295934677124, 'timestamp': '2025-09-30 22:39:54.646235', 'step': 17397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:54.703801', 'step': 17397, 'epoch': 3} {'type': 'loss', 'content': 0.05567959323525429, 'timestamp': '2025-09-30 22:39:54.707377', 'step': 17398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:54.765053', 'step': 17398, 'epoch': 3} {'type': 'loss', 'content': 0.07566992938518524, 'timestamp': '2025-09-30 22:39:54.768508', 'step': 17399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:54.853911', 'step': 17399, 'epoch': 3} {'type': 'loss', 'content': 0.0566914938390255, 'timestamp': '2025-09-30 22:39:54.860410', 'step': 17400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:54.919653', 'step': 17400, 'epoch': 3} {'type': 'loss', 'content': 0.14552736282348633, 'timestamp': '2025-09-30 22:39:54.924153', 'step': 17401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:39:54.982311', 'step': 17401, 'epoch': 3} {'type': 'loss', 'content': 0.074417844414711, 'timestamp': '2025-09-30 22:39:54.985501', 'step': 17402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:55.043628', 'step': 17402, 'epoch': 3} {'type': 'loss', 'content': 0.11655756831169128, 'timestamp': '2025-09-30 22:39:55.047640', 'step': 17403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:55.106266', 'step': 17403, 'epoch': 3} {'type': 'loss', 'content': 0.08851706236600876, 'timestamp': '2025-09-30 22:39:55.113179', 'step': 17404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:55.182960', 'step': 17404, 'epoch': 3} {'type': 'loss', 'content': 0.14861062169075012, 'timestamp': '2025-09-30 22:39:55.188006', 'step': 17405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:55.246817', 'step': 17405, 'epoch': 3} {'type': 'loss', 'content': 0.1118583157658577, 'timestamp': '2025-09-30 22:39:55.250556', 'step': 17406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:55.310858', 'step': 17406, 'epoch': 3} {'type': 'loss', 'content': 0.06805124133825302, 'timestamp': '2025-09-30 22:39:55.314251', 'step': 17407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:55.373370', 'step': 17407, 'epoch': 3} {'type': 'loss', 'content': 0.0787491723895073, 'timestamp': '2025-09-30 22:39:55.385057', 'step': 17408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:55.443949', 'step': 17408, 'epoch': 3} {'type': 'loss', 'content': 0.08604668080806732, 'timestamp': '2025-09-30 22:39:55.447186', 'step': 17409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:55.507124', 'step': 17409, 'epoch': 3} {'type': 'loss', 'content': 0.09518633782863617, 'timestamp': '2025-09-30 22:39:55.512765', 'step': 17410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:55.573311', 'step': 17410, 'epoch': 3} {'type': 'loss', 'content': 0.12932375073432922, 'timestamp': '2025-09-30 22:39:55.577935', 'step': 17411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:55.659378', 'step': 17411, 'epoch': 3} {'type': 'loss', 'content': 0.10313793271780014, 'timestamp': '2025-09-30 22:39:55.673059', 'step': 17412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:55.729551', 'step': 17412, 'epoch': 3} {'type': 'loss', 'content': 0.03358161076903343, 'timestamp': '2025-09-30 22:39:55.737383', 'step': 17413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:55.796725', 'step': 17413, 'epoch': 3} {'type': 'loss', 'content': 0.18907450139522552, 'timestamp': '2025-09-30 22:39:55.800272', 'step': 17414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:55.867500', 'step': 17414, 'epoch': 3} {'type': 'loss', 'content': 0.13602235913276672, 'timestamp': '2025-09-30 22:39:55.877874', 'step': 17415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:55.943967', 'step': 17415, 'epoch': 3} {'type': 'loss', 'content': 0.09070781618356705, 'timestamp': '2025-09-30 22:39:55.958521', 'step': 17416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:56.044058', 'step': 17416, 'epoch': 3} {'type': 'loss', 'content': 0.09835775196552277, 'timestamp': '2025-09-30 22:39:56.056726', 'step': 17417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:39:56.153956', 'step': 17417, 'epoch': 3} {'type': 'loss', 'content': 0.09584753960371017, 'timestamp': '2025-09-30 22:39:56.158210', 'step': 17418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:56.222550', 'step': 17418, 'epoch': 3} {'type': 'loss', 'content': 0.055687449872493744, 'timestamp': '2025-09-30 22:39:56.227408', 'step': 17419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:56.291910', 'step': 17419, 'epoch': 3} {'type': 'loss', 'content': 0.11203952878713608, 'timestamp': '2025-09-30 22:39:56.298677', 'step': 17420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:56.361030', 'step': 17420, 'epoch': 3} {'type': 'loss', 'content': 0.11260916292667389, 'timestamp': '2025-09-30 22:39:56.371557', 'step': 17421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:56.443862', 'step': 17421, 'epoch': 3} {'type': 'loss', 'content': 0.12424976378679276, 'timestamp': '2025-09-30 22:39:56.451748', 'step': 17422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:56.510298', 'step': 17422, 'epoch': 3} {'type': 'loss', 'content': 0.08906616270542145, 'timestamp': '2025-09-30 22:39:56.512873', 'step': 17423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:56.572512', 'step': 17423, 'epoch': 3} {'type': 'loss', 'content': 0.05510678142309189, 'timestamp': '2025-09-30 22:39:56.578895', 'step': 17424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:56.638999', 'step': 17424, 'epoch': 3} {'type': 'loss', 'content': 0.14482663571834564, 'timestamp': '2025-09-30 22:39:56.641580', 'step': 17425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:56.704229', 'step': 17425, 'epoch': 3} {'type': 'loss', 'content': 0.05255138874053955, 'timestamp': '2025-09-30 22:39:56.707592', 'step': 17426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:56.764744', 'step': 17426, 'epoch': 3} {'type': 'loss', 'content': 0.03584132343530655, 'timestamp': '2025-09-30 22:39:56.768001', 'step': 17427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:56.839339', 'step': 17427, 'epoch': 3} {'type': 'loss', 'content': 0.17077915370464325, 'timestamp': '2025-09-30 22:39:56.846101', 'step': 17428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:56.903045', 'step': 17428, 'epoch': 3} {'type': 'loss', 'content': 0.04687199369072914, 'timestamp': '2025-09-30 22:39:56.906332', 'step': 17429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:56.965291', 'step': 17429, 'epoch': 3} {'type': 'loss', 'content': 0.0651312842965126, 'timestamp': '2025-09-30 22:39:56.980118', 'step': 17430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:57.068636', 'step': 17430, 'epoch': 3} {'type': 'loss', 'content': 0.13979840278625488, 'timestamp': '2025-09-30 22:39:57.077668', 'step': 17431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:57.138779', 'step': 17431, 'epoch': 3} {'type': 'loss', 'content': 0.0639277845621109, 'timestamp': '2025-09-30 22:39:57.146223', 'step': 17432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:57.202700', 'step': 17432, 'epoch': 3} {'type': 'loss', 'content': 0.17318011820316315, 'timestamp': '2025-09-30 22:39:57.205888', 'step': 17433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:57.263754', 'step': 17433, 'epoch': 3} {'type': 'loss', 'content': 0.08906751871109009, 'timestamp': '2025-09-30 22:39:57.272530', 'step': 17434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:57.330875', 'step': 17434, 'epoch': 3} {'type': 'loss', 'content': 0.113864965736866, 'timestamp': '2025-09-30 22:39:57.333738', 'step': 17435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:57.400539', 'step': 17435, 'epoch': 3} {'type': 'loss', 'content': 0.0981256440281868, 'timestamp': '2025-09-30 22:39:57.407276', 'step': 17436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:57.463564', 'step': 17436, 'epoch': 3} {'type': 'loss', 'content': 0.10858211666345596, 'timestamp': '2025-09-30 22:39:57.467191', 'step': 17437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:57.525277', 'step': 17437, 'epoch': 3} {'type': 'loss', 'content': 0.04260161146521568, 'timestamp': '2025-09-30 22:39:57.534536', 'step': 17438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:57.601372', 'step': 17438, 'epoch': 3} {'type': 'loss', 'content': 0.10488171130418777, 'timestamp': '2025-09-30 22:39:57.605173', 'step': 17439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:57.662847', 'step': 17439, 'epoch': 3} {'type': 'loss', 'content': 0.08154758810997009, 'timestamp': '2025-09-30 22:39:57.669801', 'step': 17440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:57.737818', 'step': 17440, 'epoch': 3} {'type': 'loss', 'content': 0.12852975726127625, 'timestamp': '2025-09-30 22:39:57.745054', 'step': 17441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:57.809184', 'step': 17441, 'epoch': 3} {'type': 'loss', 'content': 0.059316761791706085, 'timestamp': '2025-09-30 22:39:57.813305', 'step': 17442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:57.872566', 'step': 17442, 'epoch': 3} {'type': 'loss', 'content': 0.04663264751434326, 'timestamp': '2025-09-30 22:39:57.875318', 'step': 17443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:57.938473', 'step': 17443, 'epoch': 3} {'type': 'loss', 'content': 0.04935518652200699, 'timestamp': '2025-09-30 22:39:57.947739', 'step': 17444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:58.005017', 'step': 17444, 'epoch': 3} {'type': 'loss', 'content': 0.08167056739330292, 'timestamp': '2025-09-30 22:39:58.008749', 'step': 17445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:58.071770', 'step': 17445, 'epoch': 3} {'type': 'loss', 'content': 0.07264985144138336, 'timestamp': '2025-09-30 22:39:58.084721', 'step': 17446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:58.143225', 'step': 17446, 'epoch': 3} {'type': 'loss', 'content': 0.09358903020620346, 'timestamp': '2025-09-30 22:39:58.147746', 'step': 17447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:58.210317', 'step': 17447, 'epoch': 3} {'type': 'loss', 'content': 0.10632861405611038, 'timestamp': '2025-09-30 22:39:58.217119', 'step': 17448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:58.274458', 'step': 17448, 'epoch': 3} {'type': 'loss', 'content': 0.11490877717733383, 'timestamp': '2025-09-30 22:39:58.277566', 'step': 17449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:58.337584', 'step': 17449, 'epoch': 3} {'type': 'loss', 'content': 0.14011900126934052, 'timestamp': '2025-09-30 22:39:58.341075', 'step': 17450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:58.400866', 'step': 17450, 'epoch': 3} {'type': 'loss', 'content': 0.12590038776397705, 'timestamp': '2025-09-30 22:39:58.408883', 'step': 17451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:58.471450', 'step': 17451, 'epoch': 3} {'type': 'loss', 'content': 0.11882642656564713, 'timestamp': '2025-09-30 22:39:58.477665', 'step': 17452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:58.546088', 'step': 17452, 'epoch': 3} {'type': 'loss', 'content': 0.09256315231323242, 'timestamp': '2025-09-30 22:39:58.548844', 'step': 17453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:58.631230', 'step': 17453, 'epoch': 3} {'type': 'loss', 'content': 0.08553123474121094, 'timestamp': '2025-09-30 22:39:58.637244', 'step': 17454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:58.699365', 'step': 17454, 'epoch': 3} {'type': 'loss', 'content': 0.1337822526693344, 'timestamp': '2025-09-30 22:39:58.702359', 'step': 17455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:58.773632', 'step': 17455, 'epoch': 3} {'type': 'loss', 'content': 0.1121298223733902, 'timestamp': '2025-09-30 22:39:58.780912', 'step': 17456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:58.848810', 'step': 17456, 'epoch': 3} {'type': 'loss', 'content': 0.03660526126623154, 'timestamp': '2025-09-30 22:39:58.854456', 'step': 17457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:58.922630', 'step': 17457, 'epoch': 3} {'type': 'loss', 'content': 0.069816455245018, 'timestamp': '2025-09-30 22:39:58.928019', 'step': 17458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:58.997810', 'step': 17458, 'epoch': 3} {'type': 'loss', 'content': 0.08221849054098129, 'timestamp': '2025-09-30 22:39:59.003571', 'step': 17459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:59.063177', 'step': 17459, 'epoch': 3} {'type': 'loss', 'content': 0.0719928964972496, 'timestamp': '2025-09-30 22:39:59.071056', 'step': 17460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:59.129953', 'step': 17460, 'epoch': 3} {'type': 'loss', 'content': 0.12274643778800964, 'timestamp': '2025-09-30 22:39:59.145558', 'step': 17461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:59.210689', 'step': 17461, 'epoch': 3} {'type': 'loss', 'content': 0.047924719750881195, 'timestamp': '2025-09-30 22:39:59.213266', 'step': 17462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:59.277174', 'step': 17462, 'epoch': 3} {'type': 'loss', 'content': 0.04618813097476959, 'timestamp': '2025-09-30 22:39:59.282662', 'step': 17463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:59.355364', 'step': 17463, 'epoch': 3} {'type': 'loss', 'content': 0.12139968574047089, 'timestamp': '2025-09-30 22:39:59.361282', 'step': 17464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:59.417264', 'step': 17464, 'epoch': 3} {'type': 'loss', 'content': 0.10617846250534058, 'timestamp': '2025-09-30 22:39:59.419850', 'step': 17465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:59.479560', 'step': 17465, 'epoch': 3} {'type': 'loss', 'content': 0.06671135872602463, 'timestamp': '2025-09-30 22:39:59.486610', 'step': 17466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:59.544893', 'step': 17466, 'epoch': 3} {'type': 'loss', 'content': 0.10546177625656128, 'timestamp': '2025-09-30 22:39:59.547935', 'step': 17467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:59.620647', 'step': 17467, 'epoch': 3} {'type': 'loss', 'content': 0.17307722568511963, 'timestamp': '2025-09-30 22:39:59.629350', 'step': 17468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:39:59.688288', 'step': 17468, 'epoch': 3} {'type': 'loss', 'content': 0.05819641053676605, 'timestamp': '2025-09-30 22:39:59.695910', 'step': 17469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:39:59.760313', 'step': 17469, 'epoch': 3} {'type': 'loss', 'content': 0.05130593106150627, 'timestamp': '2025-09-30 22:39:59.766247', 'step': 17470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:59.829817', 'step': 17470, 'epoch': 3} {'type': 'loss', 'content': 0.12820830941200256, 'timestamp': '2025-09-30 22:39:59.836879', 'step': 17471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:39:59.895704', 'step': 17471, 'epoch': 3} {'type': 'loss', 'content': 0.042990490794181824, 'timestamp': '2025-09-30 22:39:59.911676', 'step': 17472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:39:59.974058', 'step': 17472, 'epoch': 3} {'type': 'loss', 'content': 0.11529720574617386, 'timestamp': '2025-09-30 22:39:59.978770', 'step': 17473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:00.036292', 'step': 17473, 'epoch': 3} {'type': 'loss', 'content': 0.1125902533531189, 'timestamp': '2025-09-30 22:40:00.042764', 'step': 17474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:00.117589', 'step': 17474, 'epoch': 3} {'type': 'loss', 'content': 0.07662495970726013, 'timestamp': '2025-09-30 22:40:00.120282', 'step': 17475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:00.178020', 'step': 17475, 'epoch': 3} {'type': 'loss', 'content': 0.06944239139556885, 'timestamp': '2025-09-30 22:40:00.184930', 'step': 17476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:00.251705', 'step': 17476, 'epoch': 3} {'type': 'loss', 'content': 0.04985158145427704, 'timestamp': '2025-09-30 22:40:00.266965', 'step': 17477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:00.340793', 'step': 17477, 'epoch': 3} {'type': 'loss', 'content': 0.09871972352266312, 'timestamp': '2025-09-30 22:40:00.343464', 'step': 17478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:00.400990', 'step': 17478, 'epoch': 3} {'type': 'loss', 'content': 0.0416996031999588, 'timestamp': '2025-09-30 22:40:00.407732', 'step': 17479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:00.474197', 'step': 17479, 'epoch': 3} {'type': 'loss', 'content': 0.08199496567249298, 'timestamp': '2025-09-30 22:40:00.481760', 'step': 17480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:00.552986', 'step': 17480, 'epoch': 3} {'type': 'loss', 'content': 0.055341459810733795, 'timestamp': '2025-09-30 22:40:00.562391', 'step': 17481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:00.623676', 'step': 17481, 'epoch': 3} {'type': 'loss', 'content': 0.023525292053818703, 'timestamp': '2025-09-30 22:40:00.637347', 'step': 17482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:00.701434', 'step': 17482, 'epoch': 3} {'type': 'loss', 'content': 0.07507830858230591, 'timestamp': '2025-09-30 22:40:00.710173', 'step': 17483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:00.775810', 'step': 17483, 'epoch': 3} {'type': 'loss', 'content': 0.01339639164507389, 'timestamp': '2025-09-30 22:40:00.786238', 'step': 17484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:00.861666', 'step': 17484, 'epoch': 3} {'type': 'loss', 'content': 0.08118350803852081, 'timestamp': '2025-09-30 22:40:00.864603', 'step': 17485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:00.922823', 'step': 17485, 'epoch': 3} {'type': 'loss', 'content': 0.057167232036590576, 'timestamp': '2025-09-30 22:40:00.935257', 'step': 17486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:01.011014', 'step': 17486, 'epoch': 3} {'type': 'loss', 'content': 0.0595717690885067, 'timestamp': '2025-09-30 22:40:01.014268', 'step': 17487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:01.083359', 'step': 17487, 'epoch': 3} {'type': 'loss', 'content': 0.12054059654474258, 'timestamp': '2025-09-30 22:40:01.097632', 'step': 17488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:01.160592', 'step': 17488, 'epoch': 3} {'type': 'loss', 'content': 0.1086365282535553, 'timestamp': '2025-09-30 22:40:01.165045', 'step': 17489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:01.222828', 'step': 17489, 'epoch': 3} {'type': 'loss', 'content': 0.08396747708320618, 'timestamp': '2025-09-30 22:40:01.227236', 'step': 17490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:01.303151', 'step': 17490, 'epoch': 3} {'type': 'loss', 'content': 0.13247857987880707, 'timestamp': '2025-09-30 22:40:01.307062', 'step': 17491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:01.371756', 'step': 17491, 'epoch': 3} {'type': 'loss', 'content': 0.0517154224216938, 'timestamp': '2025-09-30 22:40:01.381126', 'step': 17492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:01.443406', 'step': 17492, 'epoch': 3} {'type': 'loss', 'content': 0.07259385287761688, 'timestamp': '2025-09-30 22:40:01.446461', 'step': 17493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:01.503360', 'step': 17493, 'epoch': 3} {'type': 'loss', 'content': 0.07121656090021133, 'timestamp': '2025-09-30 22:40:01.512937', 'step': 17494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:40:01.571475', 'step': 17494, 'epoch': 3} {'type': 'loss', 'content': 0.06733841449022293, 'timestamp': '2025-09-30 22:40:01.575081', 'step': 17495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:01.634687', 'step': 17495, 'epoch': 3} {'type': 'loss', 'content': 0.05562884733080864, 'timestamp': '2025-09-30 22:40:01.641743', 'step': 17496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:01.713270', 'step': 17496, 'epoch': 3} {'type': 'loss', 'content': 0.03750082105398178, 'timestamp': '2025-09-30 22:40:01.716464', 'step': 17497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:01.773647', 'step': 17497, 'epoch': 3} {'type': 'loss', 'content': 0.10339812934398651, 'timestamp': '2025-09-30 22:40:01.777775', 'step': 17498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:01.843523', 'step': 17498, 'epoch': 3} {'type': 'loss', 'content': 0.12245521694421768, 'timestamp': '2025-09-30 22:40:01.854912', 'step': 17499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:01.930608', 'step': 17499, 'epoch': 3} {'type': 'loss', 'content': 0.039600271731615067, 'timestamp': '2025-09-30 22:40:01.942146', 'step': 17500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17500', 'timestamp': '2025-09-30 22:40:02.595043', 'step': 17500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:02.669019', 'step': 17500, 'epoch': 3} {'type': 'loss', 'content': 0.09001780301332474, 'timestamp': '2025-09-30 22:40:02.672414', 'step': 17501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:02.738369', 'step': 17501, 'epoch': 3} {'type': 'loss', 'content': 0.1291726678609848, 'timestamp': '2025-09-30 22:40:02.751021', 'step': 17502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:02.812482', 'step': 17502, 'epoch': 3} {'type': 'loss', 'content': 0.11895723640918732, 'timestamp': '2025-09-30 22:40:02.816254', 'step': 17503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:02.880736', 'step': 17503, 'epoch': 3} {'type': 'loss', 'content': 0.08994364738464355, 'timestamp': '2025-09-30 22:40:02.900673', 'step': 17504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:02.971137', 'step': 17504, 'epoch': 3} {'type': 'loss', 'content': 0.1187279000878334, 'timestamp': '2025-09-30 22:40:02.974644', 'step': 17505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:03.035587', 'step': 17505, 'epoch': 3} {'type': 'loss', 'content': 0.09657363593578339, 'timestamp': '2025-09-30 22:40:03.049082', 'step': 17506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.111056', 'step': 17506, 'epoch': 3} {'type': 'loss', 'content': 0.10907791554927826, 'timestamp': '2025-09-30 22:40:03.124853', 'step': 17507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.182028', 'step': 17507, 'epoch': 3} {'type': 'loss', 'content': 0.11316019296646118, 'timestamp': '2025-09-30 22:40:03.189286', 'step': 17508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.255842', 'step': 17508, 'epoch': 3} {'type': 'loss', 'content': 0.0771275982260704, 'timestamp': '2025-09-30 22:40:03.258558', 'step': 17509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:03.318991', 'step': 17509, 'epoch': 3} {'type': 'loss', 'content': 0.048009179532527924, 'timestamp': '2025-09-30 22:40:03.323583', 'step': 17510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:03.385343', 'step': 17510, 'epoch': 3} {'type': 'loss', 'content': 0.1371973156929016, 'timestamp': '2025-09-30 22:40:03.388232', 'step': 17511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.447123', 'step': 17511, 'epoch': 3} {'type': 'loss', 'content': 0.03918585926294327, 'timestamp': '2025-09-30 22:40:03.453978', 'step': 17512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:03.510440', 'step': 17512, 'epoch': 3} {'type': 'loss', 'content': 0.12645000219345093, 'timestamp': '2025-09-30 22:40:03.513280', 'step': 17513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.582766', 'step': 17513, 'epoch': 3} {'type': 'loss', 'content': 0.06088331714272499, 'timestamp': '2025-09-30 22:40:03.585407', 'step': 17514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.651391', 'step': 17514, 'epoch': 3} {'type': 'loss', 'content': 0.03861301392316818, 'timestamp': '2025-09-30 22:40:03.654686', 'step': 17515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.714028', 'step': 17515, 'epoch': 3} {'type': 'loss', 'content': 0.08376352488994598, 'timestamp': '2025-09-30 22:40:03.726096', 'step': 17516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.783198', 'step': 17516, 'epoch': 3} {'type': 'loss', 'content': 0.020541515201330185, 'timestamp': '2025-09-30 22:40:03.790243', 'step': 17517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:03.852234', 'step': 17517, 'epoch': 3} {'type': 'loss', 'content': 0.0677313357591629, 'timestamp': '2025-09-30 22:40:03.857319', 'step': 17518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:03.919872', 'step': 17518, 'epoch': 3} {'type': 'loss', 'content': 0.08541315793991089, 'timestamp': '2025-09-30 22:40:03.923000', 'step': 17519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:03.979861', 'step': 17519, 'epoch': 3} {'type': 'loss', 'content': 0.08785071969032288, 'timestamp': '2025-09-30 22:40:03.991415', 'step': 17520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:04.056455', 'step': 17520, 'epoch': 3} {'type': 'loss', 'content': 0.07550262659788132, 'timestamp': '2025-09-30 22:40:04.072036', 'step': 17521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:04.139010', 'step': 17521, 'epoch': 3} {'type': 'loss', 'content': 0.04214484989643097, 'timestamp': '2025-09-30 22:40:04.142434', 'step': 17522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:04.200063', 'step': 17522, 'epoch': 3} {'type': 'loss', 'content': 0.051970478147268295, 'timestamp': '2025-09-30 22:40:04.204804', 'step': 17523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:04.264307', 'step': 17523, 'epoch': 3} {'type': 'loss', 'content': 0.04780731350183487, 'timestamp': '2025-09-30 22:40:04.271121', 'step': 17524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:04.330423', 'step': 17524, 'epoch': 3} {'type': 'loss', 'content': 0.04919466748833656, 'timestamp': '2025-09-30 22:40:04.340895', 'step': 17525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:04.398596', 'step': 17525, 'epoch': 3} {'type': 'loss', 'content': 0.11688784509897232, 'timestamp': '2025-09-30 22:40:04.411556', 'step': 17526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:04.469543', 'step': 17526, 'epoch': 3} {'type': 'loss', 'content': 0.052001070231199265, 'timestamp': '2025-09-30 22:40:04.472742', 'step': 17527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:04.530197', 'step': 17527, 'epoch': 3} {'type': 'loss', 'content': 0.04934313893318176, 'timestamp': '2025-09-30 22:40:04.543179', 'step': 17528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:04.615083', 'step': 17528, 'epoch': 3} {'type': 'loss', 'content': 0.12930403649806976, 'timestamp': '2025-09-30 22:40:04.618455', 'step': 17529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:04.677159', 'step': 17529, 'epoch': 3} {'type': 'loss', 'content': 0.14241686463356018, 'timestamp': '2025-09-30 22:40:04.680741', 'step': 17530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:04.742583', 'step': 17530, 'epoch': 3} {'type': 'loss', 'content': 0.048981066793203354, 'timestamp': '2025-09-30 22:40:04.746616', 'step': 17531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:04.804581', 'step': 17531, 'epoch': 3} {'type': 'loss', 'content': 0.0504622720181942, 'timestamp': '2025-09-30 22:40:04.812407', 'step': 17532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:04.870792', 'step': 17532, 'epoch': 3} {'type': 'loss', 'content': 0.05832511559128761, 'timestamp': '2025-09-30 22:40:04.875564', 'step': 17533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:04.943802', 'step': 17533, 'epoch': 3} {'type': 'loss', 'content': 0.09252749383449554, 'timestamp': '2025-09-30 22:40:04.946898', 'step': 17534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:05.005143', 'step': 17534, 'epoch': 3} {'type': 'loss', 'content': 0.04106736555695534, 'timestamp': '2025-09-30 22:40:05.009067', 'step': 17535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:05.065865', 'step': 17535, 'epoch': 3} {'type': 'loss', 'content': 0.08684200048446655, 'timestamp': '2025-09-30 22:40:05.074852', 'step': 17536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:05.136853', 'step': 17536, 'epoch': 3} {'type': 'loss', 'content': 0.16833843290805817, 'timestamp': '2025-09-30 22:40:05.139637', 'step': 17537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:05.208043', 'step': 17537, 'epoch': 3} {'type': 'loss', 'content': 0.0377374067902565, 'timestamp': '2025-09-30 22:40:05.211679', 'step': 17538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:05.268399', 'step': 17538, 'epoch': 3} {'type': 'loss', 'content': 0.1510278433561325, 'timestamp': '2025-09-30 22:40:05.271108', 'step': 17539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:05.330067', 'step': 17539, 'epoch': 3} {'type': 'loss', 'content': 0.0609612800180912, 'timestamp': '2025-09-30 22:40:05.336736', 'step': 17540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:05.398905', 'step': 17540, 'epoch': 3} {'type': 'loss', 'content': 0.052398230880498886, 'timestamp': '2025-09-30 22:40:05.404009', 'step': 17541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:05.462597', 'step': 17541, 'epoch': 3} {'type': 'loss', 'content': 0.03589851036667824, 'timestamp': '2025-09-30 22:40:05.465463', 'step': 17542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:05.545289', 'step': 17542, 'epoch': 3} {'type': 'loss', 'content': 0.1008245050907135, 'timestamp': '2025-09-30 22:40:05.550141', 'step': 17543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:05.625771', 'step': 17543, 'epoch': 3} {'type': 'loss', 'content': 0.13149644434452057, 'timestamp': '2025-09-30 22:40:05.634543', 'step': 17544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:05.694140', 'step': 17544, 'epoch': 3} {'type': 'loss', 'content': 0.07295019924640656, 'timestamp': '2025-09-30 22:40:05.698304', 'step': 17545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:05.764431', 'step': 17545, 'epoch': 3} {'type': 'loss', 'content': 0.0868389904499054, 'timestamp': '2025-09-30 22:40:05.767624', 'step': 17546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:05.831631', 'step': 17546, 'epoch': 3} {'type': 'loss', 'content': 0.11062652617692947, 'timestamp': '2025-09-30 22:40:05.835571', 'step': 17547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:05.892977', 'step': 17547, 'epoch': 3} {'type': 'loss', 'content': 0.0493246354162693, 'timestamp': '2025-09-30 22:40:05.902071', 'step': 17548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:05.969726', 'step': 17548, 'epoch': 3} {'type': 'loss', 'content': 0.061391621828079224, 'timestamp': '2025-09-30 22:40:05.973195', 'step': 17549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:06.039847', 'step': 17549, 'epoch': 3} {'type': 'loss', 'content': 0.04512885585427284, 'timestamp': '2025-09-30 22:40:06.043227', 'step': 17550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:06.102103', 'step': 17550, 'epoch': 3} {'type': 'loss', 'content': 0.09000194072723389, 'timestamp': '2025-09-30 22:40:06.107644', 'step': 17551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:06.176596', 'step': 17551, 'epoch': 3} {'type': 'loss', 'content': 0.06599600613117218, 'timestamp': '2025-09-30 22:40:06.183028', 'step': 17552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:06.241525', 'step': 17552, 'epoch': 3} {'type': 'loss', 'content': 0.06121189147233963, 'timestamp': '2025-09-30 22:40:06.244557', 'step': 17553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:06.312594', 'step': 17553, 'epoch': 3} {'type': 'loss', 'content': 0.07905341684818268, 'timestamp': '2025-09-30 22:40:06.316227', 'step': 17554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:06.376716', 'step': 17554, 'epoch': 3} {'type': 'loss', 'content': 0.07722357660531998, 'timestamp': '2025-09-30 22:40:06.381849', 'step': 17555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:06.450129', 'step': 17555, 'epoch': 3} {'type': 'loss', 'content': 0.09008309245109558, 'timestamp': '2025-09-30 22:40:06.456567', 'step': 17556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:06.525389', 'step': 17556, 'epoch': 3} {'type': 'loss', 'content': 0.07596040517091751, 'timestamp': '2025-09-30 22:40:06.527891', 'step': 17557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:06.584931', 'step': 17557, 'epoch': 3} {'type': 'loss', 'content': 0.036704204976558685, 'timestamp': '2025-09-30 22:40:06.597116', 'step': 17558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:06.656027', 'step': 17558, 'epoch': 3} {'type': 'loss', 'content': 0.11608558893203735, 'timestamp': '2025-09-30 22:40:06.661056', 'step': 17559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:06.724649', 'step': 17559, 'epoch': 3} {'type': 'loss', 'content': 0.1029219776391983, 'timestamp': '2025-09-30 22:40:06.737484', 'step': 17560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:06.796184', 'step': 17560, 'epoch': 3} {'type': 'loss', 'content': 0.12426691502332687, 'timestamp': '2025-09-30 22:40:06.808568', 'step': 17561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:06.865500', 'step': 17561, 'epoch': 3} {'type': 'loss', 'content': 0.11850722134113312, 'timestamp': '2025-09-30 22:40:06.877323', 'step': 17562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:06.946599', 'step': 17562, 'epoch': 3} {'type': 'loss', 'content': 0.0752255767583847, 'timestamp': '2025-09-30 22:40:06.958722', 'step': 17563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:07.018865', 'step': 17563, 'epoch': 3} {'type': 'loss', 'content': 0.039421744644641876, 'timestamp': '2025-09-30 22:40:07.025914', 'step': 17564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:07.088142', 'step': 17564, 'epoch': 3} {'type': 'loss', 'content': 0.07811930775642395, 'timestamp': '2025-09-30 22:40:07.097864', 'step': 17565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:07.155873', 'step': 17565, 'epoch': 3} {'type': 'loss', 'content': 0.09365396201610565, 'timestamp': '2025-09-30 22:40:07.165089', 'step': 17566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:07.225256', 'step': 17566, 'epoch': 3} {'type': 'loss', 'content': 0.07843578606843948, 'timestamp': '2025-09-30 22:40:07.230765', 'step': 17567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:07.289593', 'step': 17567, 'epoch': 3} {'type': 'loss', 'content': 0.04121273383498192, 'timestamp': '2025-09-30 22:40:07.295979', 'step': 17568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:07.358899', 'step': 17568, 'epoch': 3} {'type': 'loss', 'content': 0.08676863461732864, 'timestamp': '2025-09-30 22:40:07.366068', 'step': 17569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:07.425563', 'step': 17569, 'epoch': 3} {'type': 'loss', 'content': 0.1082945317029953, 'timestamp': '2025-09-30 22:40:07.427886', 'step': 17570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:07.483944', 'step': 17570, 'epoch': 3} {'type': 'loss', 'content': 0.09798697382211685, 'timestamp': '2025-09-30 22:40:07.491905', 'step': 17571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:07.557815', 'step': 17571, 'epoch': 3} {'type': 'loss', 'content': 0.0370810367166996, 'timestamp': '2025-09-30 22:40:07.564555', 'step': 17572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:07.622770', 'step': 17572, 'epoch': 3} {'type': 'loss', 'content': 0.0812705010175705, 'timestamp': '2025-09-30 22:40:07.625771', 'step': 17573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:07.685603', 'step': 17573, 'epoch': 3} {'type': 'loss', 'content': 0.011731231585144997, 'timestamp': '2025-09-30 22:40:07.694980', 'step': 17574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:07.760055', 'step': 17574, 'epoch': 3} {'type': 'loss', 'content': 0.05833233892917633, 'timestamp': '2025-09-30 22:40:07.763384', 'step': 17575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:07.821068', 'step': 17575, 'epoch': 3} {'type': 'loss', 'content': 0.08752112090587616, 'timestamp': '2025-09-30 22:40:07.827110', 'step': 17576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:40:07.895877', 'step': 17576, 'epoch': 3} {'type': 'loss', 'content': 0.05080685019493103, 'timestamp': '2025-09-30 22:40:07.898234', 'step': 17577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:07.956007', 'step': 17577, 'epoch': 3} {'type': 'loss', 'content': 0.04150007665157318, 'timestamp': '2025-09-30 22:40:07.963306', 'step': 17578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:08.022407', 'step': 17578, 'epoch': 3} {'type': 'loss', 'content': 0.025357231497764587, 'timestamp': '2025-09-30 22:40:08.026112', 'step': 17579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:08.091958', 'step': 17579, 'epoch': 3} {'type': 'loss', 'content': 0.17657476663589478, 'timestamp': '2025-09-30 22:40:08.099677', 'step': 17580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:08.162545', 'step': 17580, 'epoch': 3} {'type': 'loss', 'content': 0.15421085059642792, 'timestamp': '2025-09-30 22:40:08.165745', 'step': 17581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:08.225201', 'step': 17581, 'epoch': 3} {'type': 'loss', 'content': 0.10194312781095505, 'timestamp': '2025-09-30 22:40:08.229268', 'step': 17582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:08.290544', 'step': 17582, 'epoch': 3} {'type': 'loss', 'content': 0.11462327837944031, 'timestamp': '2025-09-30 22:40:08.293332', 'step': 17583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:08.352223', 'step': 17583, 'epoch': 3} {'type': 'loss', 'content': 0.1485443115234375, 'timestamp': '2025-09-30 22:40:08.364888', 'step': 17584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:08.425444', 'step': 17584, 'epoch': 3} {'type': 'loss', 'content': 0.11384491622447968, 'timestamp': '2025-09-30 22:40:08.431543', 'step': 17585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:08.504630', 'step': 17585, 'epoch': 3} {'type': 'loss', 'content': 0.06391202658414841, 'timestamp': '2025-09-30 22:40:08.507881', 'step': 17586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:08.574791', 'step': 17586, 'epoch': 3} {'type': 'loss', 'content': 0.11199364066123962, 'timestamp': '2025-09-30 22:40:08.578727', 'step': 17587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:08.637545', 'step': 17587, 'epoch': 3} {'type': 'loss', 'content': 0.10022628307342529, 'timestamp': '2025-09-30 22:40:08.645354', 'step': 17588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:08.708510', 'step': 17588, 'epoch': 3} {'type': 'loss', 'content': 0.12613970041275024, 'timestamp': '2025-09-30 22:40:08.713067', 'step': 17589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:08.778269', 'step': 17589, 'epoch': 3} {'type': 'loss', 'content': 0.09598677605390549, 'timestamp': '2025-09-30 22:40:08.788846', 'step': 17590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:08.857625', 'step': 17590, 'epoch': 3} {'type': 'loss', 'content': 0.08421852439641953, 'timestamp': '2025-09-30 22:40:08.862128', 'step': 17591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:08.922735', 'step': 17591, 'epoch': 3} {'type': 'loss', 'content': 0.057950712740421295, 'timestamp': '2025-09-30 22:40:08.929480', 'step': 17592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:08.985988', 'step': 17592, 'epoch': 3} {'type': 'loss', 'content': 0.07392856478691101, 'timestamp': '2025-09-30 22:40:08.993562', 'step': 17593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:09.055955', 'step': 17593, 'epoch': 3} {'type': 'loss', 'content': 0.06900623440742493, 'timestamp': '2025-09-30 22:40:09.058998', 'step': 17594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:09.117026', 'step': 17594, 'epoch': 3} {'type': 'loss', 'content': 0.07579969614744186, 'timestamp': '2025-09-30 22:40:09.120095', 'step': 17595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:40:09.177996', 'step': 17595, 'epoch': 3} {'type': 'loss', 'content': 0.07416152954101562, 'timestamp': '2025-09-30 22:40:09.185070', 'step': 17596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:09.246740', 'step': 17596, 'epoch': 3} {'type': 'loss', 'content': 0.09616834670305252, 'timestamp': '2025-09-30 22:40:09.257533', 'step': 17597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:09.315482', 'step': 17597, 'epoch': 3} {'type': 'loss', 'content': 0.10614390671253204, 'timestamp': '2025-09-30 22:40:09.318978', 'step': 17598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:09.378534', 'step': 17598, 'epoch': 3} {'type': 'loss', 'content': 0.041521165519952774, 'timestamp': '2025-09-30 22:40:09.382755', 'step': 17599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:09.444197', 'step': 17599, 'epoch': 3} {'type': 'loss', 'content': 0.11149761080741882, 'timestamp': '2025-09-30 22:40:09.451074', 'step': 17600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:09.530132', 'step': 17600, 'epoch': 3} {'type': 'loss', 'content': 0.04461297020316124, 'timestamp': '2025-09-30 22:40:09.536302', 'step': 17601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:09.596896', 'step': 17601, 'epoch': 3} {'type': 'loss', 'content': 0.0659426674246788, 'timestamp': '2025-09-30 22:40:09.613382', 'step': 17602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:09.685662', 'step': 17602, 'epoch': 3} {'type': 'loss', 'content': 0.13412286341190338, 'timestamp': '2025-09-30 22:40:09.702914', 'step': 17603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:09.778141', 'step': 17603, 'epoch': 3} {'type': 'loss', 'content': 0.06941522657871246, 'timestamp': '2025-09-30 22:40:09.785262', 'step': 17604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:09.851139', 'step': 17604, 'epoch': 3} {'type': 'loss', 'content': 0.040139444172382355, 'timestamp': '2025-09-30 22:40:09.860873', 'step': 17605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:09.936771', 'step': 17605, 'epoch': 3} {'type': 'loss', 'content': 0.049622002989053726, 'timestamp': '2025-09-30 22:40:09.944148', 'step': 17606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:10.019897', 'step': 17606, 'epoch': 3} {'type': 'loss', 'content': 0.04109277203679085, 'timestamp': '2025-09-30 22:40:10.035341', 'step': 17607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:10.115951', 'step': 17607, 'epoch': 3} {'type': 'loss', 'content': 0.07242599874734879, 'timestamp': '2025-09-30 22:40:10.128504', 'step': 17608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:10.208582', 'step': 17608, 'epoch': 3} {'type': 'loss', 'content': 0.07840085029602051, 'timestamp': '2025-09-30 22:40:10.212341', 'step': 17609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:10.274876', 'step': 17609, 'epoch': 3} {'type': 'loss', 'content': 0.09087829291820526, 'timestamp': '2025-09-30 22:40:10.279572', 'step': 17610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:10.350614', 'step': 17610, 'epoch': 3} {'type': 'loss', 'content': 0.10834541916847229, 'timestamp': '2025-09-30 22:40:10.357105', 'step': 17611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:10.416388', 'step': 17611, 'epoch': 3} {'type': 'loss', 'content': 0.16581609845161438, 'timestamp': '2025-09-30 22:40:10.424091', 'step': 17612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:10.499004', 'step': 17612, 'epoch': 3} {'type': 'loss', 'content': 0.03861949220299721, 'timestamp': '2025-09-30 22:40:10.510349', 'step': 17613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:10.580639', 'step': 17613, 'epoch': 3} {'type': 'loss', 'content': 0.08058089762926102, 'timestamp': '2025-09-30 22:40:10.584434', 'step': 17614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:10.657330', 'step': 17614, 'epoch': 3} {'type': 'loss', 'content': 0.12119905650615692, 'timestamp': '2025-09-30 22:40:10.662565', 'step': 17615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:10.723681', 'step': 17615, 'epoch': 3} {'type': 'loss', 'content': 0.08194676786661148, 'timestamp': '2025-09-30 22:40:10.731937', 'step': 17616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:10.798825', 'step': 17616, 'epoch': 3} {'type': 'loss', 'content': 0.06790726631879807, 'timestamp': '2025-09-30 22:40:10.802508', 'step': 17617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:10.865474', 'step': 17617, 'epoch': 3} {'type': 'loss', 'content': 0.09431919455528259, 'timestamp': '2025-09-30 22:40:10.871232', 'step': 17618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:10.942389', 'step': 17618, 'epoch': 3} {'type': 'loss', 'content': 0.12441353499889374, 'timestamp': '2025-09-30 22:40:10.954509', 'step': 17619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:11.025195', 'step': 17619, 'epoch': 3} {'type': 'loss', 'content': 0.06308635324239731, 'timestamp': '2025-09-30 22:40:11.039149', 'step': 17620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:11.097642', 'step': 17620, 'epoch': 3} {'type': 'loss', 'content': 0.01677052117884159, 'timestamp': '2025-09-30 22:40:11.100894', 'step': 17621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:11.173056', 'step': 17621, 'epoch': 3} {'type': 'loss', 'content': 0.055094826966524124, 'timestamp': '2025-09-30 22:40:11.180225', 'step': 17622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:11.245529', 'step': 17622, 'epoch': 3} {'type': 'loss', 'content': 0.16655774414539337, 'timestamp': '2025-09-30 22:40:11.259255', 'step': 17623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:11.331632', 'step': 17623, 'epoch': 3} {'type': 'loss', 'content': 0.09100156277418137, 'timestamp': '2025-09-30 22:40:11.340034', 'step': 17624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:11.402890', 'step': 17624, 'epoch': 3} {'type': 'loss', 'content': 0.14077267050743103, 'timestamp': '2025-09-30 22:40:11.412819', 'step': 17625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:11.493952', 'step': 17625, 'epoch': 3} {'type': 'loss', 'content': 0.09868085384368896, 'timestamp': '2025-09-30 22:40:11.497841', 'step': 17626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:11.584288', 'step': 17626, 'epoch': 3} {'type': 'loss', 'content': 0.14100535213947296, 'timestamp': '2025-09-30 22:40:11.595327', 'step': 17627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:11.656345', 'step': 17627, 'epoch': 3} {'type': 'loss', 'content': 0.08457449078559875, 'timestamp': '2025-09-30 22:40:11.663204', 'step': 17628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:11.742592', 'step': 17628, 'epoch': 3} {'type': 'loss', 'content': 0.09610455483198166, 'timestamp': '2025-09-30 22:40:11.745443', 'step': 17629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:11.816044', 'step': 17629, 'epoch': 3} {'type': 'loss', 'content': 0.16647209227085114, 'timestamp': '2025-09-30 22:40:11.819856', 'step': 17630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:11.889798', 'step': 17630, 'epoch': 3} {'type': 'loss', 'content': 0.1254105567932129, 'timestamp': '2025-09-30 22:40:11.894731', 'step': 17631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:11.959270', 'step': 17631, 'epoch': 3} {'type': 'loss', 'content': 0.1306317299604416, 'timestamp': '2025-09-30 22:40:11.972573', 'step': 17632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:12.033853', 'step': 17632, 'epoch': 3} {'type': 'loss', 'content': 0.16134881973266602, 'timestamp': '2025-09-30 22:40:12.036932', 'step': 17633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:12.108366', 'step': 17633, 'epoch': 3} {'type': 'loss', 'content': 0.058684613555669785, 'timestamp': '2025-09-30 22:40:12.115989', 'step': 17634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:12.177456', 'step': 17634, 'epoch': 3} {'type': 'loss', 'content': 0.12209974974393845, 'timestamp': '2025-09-30 22:40:12.182262', 'step': 17635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:12.261211', 'step': 17635, 'epoch': 3} {'type': 'loss', 'content': 0.0798247680068016, 'timestamp': '2025-09-30 22:40:12.268851', 'step': 17636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:12.332107', 'step': 17636, 'epoch': 3} {'type': 'loss', 'content': 0.09896796196699142, 'timestamp': '2025-09-30 22:40:12.338332', 'step': 17637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:12.400424', 'step': 17637, 'epoch': 3} {'type': 'loss', 'content': 0.11878486722707748, 'timestamp': '2025-09-30 22:40:12.403247', 'step': 17638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:12.472075', 'step': 17638, 'epoch': 3} {'type': 'loss', 'content': 0.04803473502397537, 'timestamp': '2025-09-30 22:40:12.474738', 'step': 17639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:12.549152', 'step': 17639, 'epoch': 3} {'type': 'loss', 'content': 0.18182243406772614, 'timestamp': '2025-09-30 22:40:12.556217', 'step': 17640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:12.616636', 'step': 17640, 'epoch': 3} {'type': 'loss', 'content': 0.13703873753547668, 'timestamp': '2025-09-30 22:40:12.623543', 'step': 17641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:12.689219', 'step': 17641, 'epoch': 3} {'type': 'loss', 'content': 0.11255159974098206, 'timestamp': '2025-09-30 22:40:12.692374', 'step': 17642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:12.753979', 'step': 17642, 'epoch': 3} {'type': 'loss', 'content': 0.11830238997936249, 'timestamp': '2025-09-30 22:40:12.756830', 'step': 17643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:12.821302', 'step': 17643, 'epoch': 3} {'type': 'loss', 'content': 0.120919369161129, 'timestamp': '2025-09-30 22:40:12.828297', 'step': 17644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:12.897213', 'step': 17644, 'epoch': 3} {'type': 'loss', 'content': 0.13584783673286438, 'timestamp': '2025-09-30 22:40:12.903071', 'step': 17645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:12.970892', 'step': 17645, 'epoch': 3} {'type': 'loss', 'content': 0.06154954060912132, 'timestamp': '2025-09-30 22:40:12.974295', 'step': 17646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:13.052938', 'step': 17646, 'epoch': 3} {'type': 'loss', 'content': 0.12006890773773193, 'timestamp': '2025-09-30 22:40:13.055443', 'step': 17647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:13.116532', 'step': 17647, 'epoch': 3} {'type': 'loss', 'content': 0.08124306052923203, 'timestamp': '2025-09-30 22:40:13.122853', 'step': 17648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:13.184875', 'step': 17648, 'epoch': 3} {'type': 'loss', 'content': 0.05982539802789688, 'timestamp': '2025-09-30 22:40:13.201938', 'step': 17649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:13.275811', 'step': 17649, 'epoch': 3} {'type': 'loss', 'content': 0.061275407671928406, 'timestamp': '2025-09-30 22:40:13.291086', 'step': 17650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:13.360284', 'step': 17650, 'epoch': 3} {'type': 'loss', 'content': 0.19869482517242432, 'timestamp': '2025-09-30 22:40:13.363429', 'step': 17651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:13.441065', 'step': 17651, 'epoch': 3} {'type': 'loss', 'content': 0.13005399703979492, 'timestamp': '2025-09-30 22:40:13.448379', 'step': 17652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:13.504471', 'step': 17652, 'epoch': 3} {'type': 'loss', 'content': 0.07890380918979645, 'timestamp': '2025-09-30 22:40:13.511529', 'step': 17653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:13.586035', 'step': 17653, 'epoch': 3} {'type': 'loss', 'content': 0.10560019314289093, 'timestamp': '2025-09-30 22:40:13.589874', 'step': 17654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:13.655304', 'step': 17654, 'epoch': 3} {'type': 'loss', 'content': 0.18345819413661957, 'timestamp': '2025-09-30 22:40:13.657556', 'step': 17655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:13.717122', 'step': 17655, 'epoch': 3} {'type': 'loss', 'content': 0.12191847711801529, 'timestamp': '2025-09-30 22:40:13.722956', 'step': 17656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:13.780106', 'step': 17656, 'epoch': 3} {'type': 'loss', 'content': 0.051536817103624344, 'timestamp': '2025-09-30 22:40:13.782464', 'step': 17657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:13.850275', 'step': 17657, 'epoch': 3} {'type': 'loss', 'content': 0.13708887994289398, 'timestamp': '2025-09-30 22:40:13.853214', 'step': 17658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:13.911945', 'step': 17658, 'epoch': 3} {'type': 'loss', 'content': 0.05380910262465477, 'timestamp': '2025-09-30 22:40:13.921155', 'step': 17659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:13.987107', 'step': 17659, 'epoch': 3} {'type': 'loss', 'content': 0.11084171384572983, 'timestamp': '2025-09-30 22:40:14.001080', 'step': 17660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:14.069324', 'step': 17660, 'epoch': 3} {'type': 'loss', 'content': 0.11814402788877487, 'timestamp': '2025-09-30 22:40:14.072284', 'step': 17661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:14.140722', 'step': 17661, 'epoch': 3} {'type': 'loss', 'content': 0.042285967618227005, 'timestamp': '2025-09-30 22:40:14.144351', 'step': 17662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:14.206412', 'step': 17662, 'epoch': 3} {'type': 'loss', 'content': 0.07159586995840073, 'timestamp': '2025-09-30 22:40:14.213069', 'step': 17663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:14.273318', 'step': 17663, 'epoch': 3} {'type': 'loss', 'content': 0.07858750224113464, 'timestamp': '2025-09-30 22:40:14.279821', 'step': 17664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:14.339408', 'step': 17664, 'epoch': 3} {'type': 'loss', 'content': 0.12962475419044495, 'timestamp': '2025-09-30 22:40:14.342275', 'step': 17665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:14.401978', 'step': 17665, 'epoch': 3} {'type': 'loss', 'content': 0.0756165161728859, 'timestamp': '2025-09-30 22:40:14.404907', 'step': 17666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:14.474580', 'step': 17666, 'epoch': 3} {'type': 'loss', 'content': 0.04661150276660919, 'timestamp': '2025-09-30 22:40:14.477743', 'step': 17667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:14.547415', 'step': 17667, 'epoch': 3} {'type': 'loss', 'content': 0.07902751117944717, 'timestamp': '2025-09-30 22:40:14.554198', 'step': 17668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:40:14.615868', 'step': 17668, 'epoch': 3} {'type': 'loss', 'content': 0.09916818141937256, 'timestamp': '2025-09-30 22:40:14.621731', 'step': 17669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:14.710258', 'step': 17669, 'epoch': 3} {'type': 'loss', 'content': 0.07373864203691483, 'timestamp': '2025-09-30 22:40:14.713990', 'step': 17670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:14.774813', 'step': 17670, 'epoch': 3} {'type': 'loss', 'content': 0.04535694420337677, 'timestamp': '2025-09-30 22:40:14.781784', 'step': 17671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:14.841196', 'step': 17671, 'epoch': 3} {'type': 'loss', 'content': 0.06400340795516968, 'timestamp': '2025-09-30 22:40:14.848564', 'step': 17672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:14.913579', 'step': 17672, 'epoch': 3} {'type': 'loss', 'content': 0.079575315117836, 'timestamp': '2025-09-30 22:40:14.919266', 'step': 17673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:14.984590', 'step': 17673, 'epoch': 3} {'type': 'loss', 'content': 0.19467027485370636, 'timestamp': '2025-09-30 22:40:14.994178', 'step': 17674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:15.071452', 'step': 17674, 'epoch': 3} {'type': 'loss', 'content': 0.13083459436893463, 'timestamp': '2025-09-30 22:40:15.075094', 'step': 17675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:15.134407', 'step': 17675, 'epoch': 3} {'type': 'loss', 'content': 0.07098323851823807, 'timestamp': '2025-09-30 22:40:15.145496', 'step': 17676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:15.214151', 'step': 17676, 'epoch': 3} {'type': 'loss', 'content': 0.09483223408460617, 'timestamp': '2025-09-30 22:40:15.217843', 'step': 17677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:15.274737', 'step': 17677, 'epoch': 3} {'type': 'loss', 'content': 0.14612489938735962, 'timestamp': '2025-09-30 22:40:15.278089', 'step': 17678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:15.345185', 'step': 17678, 'epoch': 3} {'type': 'loss', 'content': 0.08357279002666473, 'timestamp': '2025-09-30 22:40:15.352426', 'step': 17679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:15.425698', 'step': 17679, 'epoch': 3} {'type': 'loss', 'content': 0.04742427170276642, 'timestamp': '2025-09-30 22:40:15.442194', 'step': 17680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:15.499336', 'step': 17680, 'epoch': 3} {'type': 'loss', 'content': 0.1523803174495697, 'timestamp': '2025-09-30 22:40:15.504280', 'step': 17681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:15.562198', 'step': 17681, 'epoch': 3} {'type': 'loss', 'content': 0.06821615993976593, 'timestamp': '2025-09-30 22:40:15.577318', 'step': 17682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:15.647642', 'step': 17682, 'epoch': 3} {'type': 'loss', 'content': 0.05786463990807533, 'timestamp': '2025-09-30 22:40:15.665293', 'step': 17683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:15.725691', 'step': 17683, 'epoch': 3} {'type': 'loss', 'content': 0.11511503159999847, 'timestamp': '2025-09-30 22:40:15.733790', 'step': 17684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:15.791342', 'step': 17684, 'epoch': 3} {'type': 'loss', 'content': 0.0981040745973587, 'timestamp': '2025-09-30 22:40:15.795197', 'step': 17685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:15.860985', 'step': 17685, 'epoch': 3} {'type': 'loss', 'content': 0.1346128135919571, 'timestamp': '2025-09-30 22:40:15.864680', 'step': 17686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:15.922792', 'step': 17686, 'epoch': 3} {'type': 'loss', 'content': 0.13221454620361328, 'timestamp': '2025-09-30 22:40:15.934279', 'step': 17687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:16.006129', 'step': 17687, 'epoch': 3} {'type': 'loss', 'content': 0.042384061962366104, 'timestamp': '2025-09-30 22:40:16.013409', 'step': 17688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:16.076284', 'step': 17688, 'epoch': 3} {'type': 'loss', 'content': 0.11192470043897629, 'timestamp': '2025-09-30 22:40:16.079210', 'step': 17689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:16.138915', 'step': 17689, 'epoch': 3} {'type': 'loss', 'content': 0.14343269169330597, 'timestamp': '2025-09-30 22:40:16.141757', 'step': 17690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:16.204856', 'step': 17690, 'epoch': 3} {'type': 'loss', 'content': 0.04825102537870407, 'timestamp': '2025-09-30 22:40:16.211962', 'step': 17691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:16.270105', 'step': 17691, 'epoch': 3} {'type': 'loss', 'content': 0.1817169338464737, 'timestamp': '2025-09-30 22:40:16.284018', 'step': 17692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:16.342352', 'step': 17692, 'epoch': 3} {'type': 'loss', 'content': 0.03507214039564133, 'timestamp': '2025-09-30 22:40:16.345378', 'step': 17693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:16.414579', 'step': 17693, 'epoch': 3} {'type': 'loss', 'content': 0.15425723791122437, 'timestamp': '2025-09-30 22:40:16.417307', 'step': 17694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:16.477295', 'step': 17694, 'epoch': 3} {'type': 'loss', 'content': 0.10645837336778641, 'timestamp': '2025-09-30 22:40:16.480481', 'step': 17695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:16.538890', 'step': 17695, 'epoch': 3} {'type': 'loss', 'content': 0.06320233643054962, 'timestamp': '2025-09-30 22:40:16.546613', 'step': 17696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:16.609112', 'step': 17696, 'epoch': 3} {'type': 'loss', 'content': 0.09923648089170456, 'timestamp': '2025-09-30 22:40:16.615499', 'step': 17697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:16.682441', 'step': 17697, 'epoch': 3} {'type': 'loss', 'content': 0.12604743242263794, 'timestamp': '2025-09-30 22:40:16.694935', 'step': 17698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:16.761190', 'step': 17698, 'epoch': 3} {'type': 'loss', 'content': 0.06932857632637024, 'timestamp': '2025-09-30 22:40:16.774492', 'step': 17699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:16.834385', 'step': 17699, 'epoch': 3} {'type': 'loss', 'content': 0.16027872264385223, 'timestamp': '2025-09-30 22:40:16.848848', 'step': 17700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:16.910874', 'step': 17700, 'epoch': 3} {'type': 'loss', 'content': 0.06898436695337296, 'timestamp': '2025-09-30 22:40:16.914469', 'step': 17701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:16.986895', 'step': 17701, 'epoch': 3} {'type': 'loss', 'content': 0.13064666092395782, 'timestamp': '2025-09-30 22:40:16.991926', 'step': 17702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:17.053497', 'step': 17702, 'epoch': 3} {'type': 'loss', 'content': 0.11461002379655838, 'timestamp': '2025-09-30 22:40:17.058696', 'step': 17703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:17.117714', 'step': 17703, 'epoch': 3} {'type': 'loss', 'content': 0.1046219915151596, 'timestamp': '2025-09-30 22:40:17.124377', 'step': 17704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:17.188733', 'step': 17704, 'epoch': 3} {'type': 'loss', 'content': 0.027498388662934303, 'timestamp': '2025-09-30 22:40:17.193149', 'step': 17705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:17.253149', 'step': 17705, 'epoch': 3} {'type': 'loss', 'content': 0.1774854212999344, 'timestamp': '2025-09-30 22:40:17.256348', 'step': 17706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:17.313887', 'step': 17706, 'epoch': 3} {'type': 'loss', 'content': 0.15125010907649994, 'timestamp': '2025-09-30 22:40:17.324434', 'step': 17707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:17.390019', 'step': 17707, 'epoch': 3} {'type': 'loss', 'content': 0.08136922866106033, 'timestamp': '2025-09-30 22:40:17.396573', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:40:32.879106', 'step': 17708, 'epoch': 3} {'type': 'pplx', 'content': 9579.921929553866, 'timestamp': '2025-09-30 22:40:32.882866', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:32.951793', 'step': 17708, 'epoch': 3} {'type': 'loss', 'content': 0.07204531878232956, 'timestamp': '2025-09-30 22:40:32.960467', 'step': 17709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:33.019116', 'step': 17709, 'epoch': 3} {'type': 'loss', 'content': 0.12379955500364304, 'timestamp': '2025-09-30 22:40:33.022176', 'step': 17710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:33.089275', 'step': 17710, 'epoch': 3} {'type': 'loss', 'content': 0.07789245992898941, 'timestamp': '2025-09-30 22:40:33.094017', 'step': 17711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:33.154887', 'step': 17711, 'epoch': 3} {'type': 'loss', 'content': 0.24765053391456604, 'timestamp': '2025-09-30 22:40:33.171060', 'step': 17712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:33.228618', 'step': 17712, 'epoch': 3} {'type': 'loss', 'content': 0.0973547101020813, 'timestamp': '2025-09-30 22:40:33.235872', 'step': 17713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:33.298896', 'step': 17713, 'epoch': 3} {'type': 'loss', 'content': 0.11820987612009048, 'timestamp': '2025-09-30 22:40:33.307749', 'step': 17714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:33.385299', 'step': 17714, 'epoch': 3} {'type': 'loss', 'content': 0.10030964761972427, 'timestamp': '2025-09-30 22:40:33.388373', 'step': 17715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:33.445316', 'step': 17715, 'epoch': 3} {'type': 'loss', 'content': 0.07289866358041763, 'timestamp': '2025-09-30 22:40:33.452518', 'step': 17716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:33.514087', 'step': 17716, 'epoch': 3} {'type': 'loss', 'content': 0.06799064576625824, 'timestamp': '2025-09-30 22:40:33.524570', 'step': 17717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:40:33.582795', 'step': 17717, 'epoch': 3} {'type': 'loss', 'content': 0.11878972500562668, 'timestamp': '2025-09-30 22:40:33.586741', 'step': 17718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:33.644575', 'step': 17718, 'epoch': 3} {'type': 'loss', 'content': 0.01801549643278122, 'timestamp': '2025-09-30 22:40:33.648165', 'step': 17719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:33.705282', 'step': 17719, 'epoch': 3} {'type': 'loss', 'content': 0.12193267047405243, 'timestamp': '2025-09-30 22:40:33.712811', 'step': 17720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:33.771358', 'step': 17720, 'epoch': 3} {'type': 'loss', 'content': 0.036893852055072784, 'timestamp': '2025-09-30 22:40:33.776450', 'step': 17721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:33.835441', 'step': 17721, 'epoch': 3} {'type': 'loss', 'content': 0.11934427917003632, 'timestamp': '2025-09-30 22:40:33.838623', 'step': 17722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:33.908938', 'step': 17722, 'epoch': 3} {'type': 'loss', 'content': 0.042081475257873535, 'timestamp': '2025-09-30 22:40:33.915192', 'step': 17723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:33.974680', 'step': 17723, 'epoch': 3} {'type': 'loss', 'content': 0.1099713072180748, 'timestamp': '2025-09-30 22:40:33.982746', 'step': 17724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:34.040649', 'step': 17724, 'epoch': 3} {'type': 'loss', 'content': 0.1255132108926773, 'timestamp': '2025-09-30 22:40:34.045028', 'step': 17725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:34.103104', 'step': 17725, 'epoch': 3} {'type': 'loss', 'content': 0.06234806403517723, 'timestamp': '2025-09-30 22:40:34.106862', 'step': 17726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:34.261289', 'step': 17726, 'epoch': 3} {'type': 'loss', 'content': 0.16813130676746368, 'timestamp': '2025-09-30 22:40:34.267803', 'step': 17727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:34.349976', 'step': 17727, 'epoch': 3} {'type': 'loss', 'content': 0.11713090538978577, 'timestamp': '2025-09-30 22:40:34.356213', 'step': 17728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:34.435697', 'step': 17728, 'epoch': 3} {'type': 'loss', 'content': 0.16069425642490387, 'timestamp': '2025-09-30 22:40:34.444451', 'step': 17729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:34.551785', 'step': 17729, 'epoch': 3} {'type': 'loss', 'content': 0.11470261216163635, 'timestamp': '2025-09-30 22:40:34.554570', 'step': 17730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:34.647849', 'step': 17730, 'epoch': 3} {'type': 'loss', 'content': 0.05777560919523239, 'timestamp': '2025-09-30 22:40:34.650437', 'step': 17731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:34.745251', 'step': 17731, 'epoch': 3} {'type': 'loss', 'content': 0.030226079747080803, 'timestamp': '2025-09-30 22:40:34.755939', 'step': 17732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:34.873464', 'step': 17732, 'epoch': 3} {'type': 'loss', 'content': 0.09521842002868652, 'timestamp': '2025-09-30 22:40:34.884449', 'step': 17733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:35.013527', 'step': 17733, 'epoch': 3} {'type': 'loss', 'content': 0.0638374388217926, 'timestamp': '2025-09-30 22:40:35.015846', 'step': 17734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:35.095926', 'step': 17734, 'epoch': 3} {'type': 'loss', 'content': 0.09698521345853806, 'timestamp': '2025-09-30 22:40:35.098461', 'step': 17735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:35.187159', 'step': 17735, 'epoch': 3} {'type': 'loss', 'content': 0.12985269725322723, 'timestamp': '2025-09-30 22:40:35.193562', 'step': 17736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:35.258318', 'step': 17736, 'epoch': 3} {'type': 'loss', 'content': 0.06631063669919968, 'timestamp': '2025-09-30 22:40:35.262188', 'step': 17737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:35.358924', 'step': 17737, 'epoch': 3} {'type': 'loss', 'content': 0.084906667470932, 'timestamp': '2025-09-30 22:40:35.365780', 'step': 17738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:35.459305', 'step': 17738, 'epoch': 3} {'type': 'loss', 'content': 0.22866947948932648, 'timestamp': '2025-09-30 22:40:35.467314', 'step': 17739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:35.545312', 'step': 17739, 'epoch': 3} {'type': 'loss', 'content': 0.13559794425964355, 'timestamp': '2025-09-30 22:40:35.551745', 'step': 17740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:35.632528', 'step': 17740, 'epoch': 3} {'type': 'loss', 'content': 0.14553123712539673, 'timestamp': '2025-09-30 22:40:35.642006', 'step': 17741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:35.727668', 'step': 17741, 'epoch': 3} {'type': 'loss', 'content': 0.17709137499332428, 'timestamp': '2025-09-30 22:40:35.732089', 'step': 17742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:35.804626', 'step': 17742, 'epoch': 3} {'type': 'loss', 'content': 0.05766087397933006, 'timestamp': '2025-09-30 22:40:35.808094', 'step': 17743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:35.865779', 'step': 17743, 'epoch': 3} {'type': 'loss', 'content': 0.05547057092189789, 'timestamp': '2025-09-30 22:40:35.884096', 'step': 17744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:35.960742', 'step': 17744, 'epoch': 3} {'type': 'loss', 'content': 0.11796723306179047, 'timestamp': '2025-09-30 22:40:35.969581', 'step': 17745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:36.034015', 'step': 17745, 'epoch': 3} {'type': 'loss', 'content': 0.10139080882072449, 'timestamp': '2025-09-30 22:40:36.037226', 'step': 17746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:36.114372', 'step': 17746, 'epoch': 3} {'type': 'loss', 'content': 0.0767519623041153, 'timestamp': '2025-09-30 22:40:36.119466', 'step': 17747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:36.179823', 'step': 17747, 'epoch': 3} {'type': 'loss', 'content': 0.060100946575403214, 'timestamp': '2025-09-30 22:40:36.187495', 'step': 17748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:36.245837', 'step': 17748, 'epoch': 3} {'type': 'loss', 'content': 0.034140586853027344, 'timestamp': '2025-09-30 22:40:36.248849', 'step': 17749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:36.306019', 'step': 17749, 'epoch': 3} {'type': 'loss', 'content': 0.13946786522865295, 'timestamp': '2025-09-30 22:40:36.318843', 'step': 17750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:36.384648', 'step': 17750, 'epoch': 3} {'type': 'loss', 'content': 0.1097235158085823, 'timestamp': '2025-09-30 22:40:36.389519', 'step': 17751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:36.455112', 'step': 17751, 'epoch': 3} {'type': 'loss', 'content': 0.07947827130556107, 'timestamp': '2025-09-30 22:40:36.463547', 'step': 17752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:36.520681', 'step': 17752, 'epoch': 3} {'type': 'loss', 'content': 0.051177337765693665, 'timestamp': '2025-09-30 22:40:36.534501', 'step': 17753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:36.600345', 'step': 17753, 'epoch': 3} {'type': 'loss', 'content': 0.0732603445649147, 'timestamp': '2025-09-30 22:40:36.603778', 'step': 17754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:36.673623', 'step': 17754, 'epoch': 3} {'type': 'loss', 'content': 0.05101733282208443, 'timestamp': '2025-09-30 22:40:36.677673', 'step': 17755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:36.741016', 'step': 17755, 'epoch': 3} {'type': 'loss', 'content': 0.10578270256519318, 'timestamp': '2025-09-30 22:40:36.747794', 'step': 17756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:36.825128', 'step': 17756, 'epoch': 3} {'type': 'loss', 'content': 0.1252986043691635, 'timestamp': '2025-09-30 22:40:36.828581', 'step': 17757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:36.892400', 'step': 17757, 'epoch': 3} {'type': 'loss', 'content': 0.10073823481798172, 'timestamp': '2025-09-30 22:40:36.896028', 'step': 17758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:36.959686', 'step': 17758, 'epoch': 3} {'type': 'loss', 'content': 0.06806904077529907, 'timestamp': '2025-09-30 22:40:36.965763', 'step': 17759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:37.025121', 'step': 17759, 'epoch': 3} {'type': 'loss', 'content': 0.09823755919933319, 'timestamp': '2025-09-30 22:40:37.031933', 'step': 17760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:37.089721', 'step': 17760, 'epoch': 3} {'type': 'loss', 'content': 0.1699666976928711, 'timestamp': '2025-09-30 22:40:37.092626', 'step': 17761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:37.152577', 'step': 17761, 'epoch': 3} {'type': 'loss', 'content': 0.0761844739317894, 'timestamp': '2025-09-30 22:40:37.155996', 'step': 17762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:37.215119', 'step': 17762, 'epoch': 3} {'type': 'loss', 'content': 0.06932765990495682, 'timestamp': '2025-09-30 22:40:37.217840', 'step': 17763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:37.275431', 'step': 17763, 'epoch': 3} {'type': 'loss', 'content': 0.10957137495279312, 'timestamp': '2025-09-30 22:40:37.281773', 'step': 17764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:37.341284', 'step': 17764, 'epoch': 3} {'type': 'loss', 'content': 0.04518253356218338, 'timestamp': '2025-09-30 22:40:37.344028', 'step': 17765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:37.402658', 'step': 17765, 'epoch': 3} {'type': 'loss', 'content': 0.11043581366539001, 'timestamp': '2025-09-30 22:40:37.407169', 'step': 17766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:37.465216', 'step': 17766, 'epoch': 3} {'type': 'loss', 'content': 0.056558385491371155, 'timestamp': '2025-09-30 22:40:37.467923', 'step': 17767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:37.526369', 'step': 17767, 'epoch': 3} {'type': 'loss', 'content': 0.06183794140815735, 'timestamp': '2025-09-30 22:40:37.533201', 'step': 17768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:37.590129', 'step': 17768, 'epoch': 3} {'type': 'loss', 'content': 0.09853880852460861, 'timestamp': '2025-09-30 22:40:37.593559', 'step': 17769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:37.651693', 'step': 17769, 'epoch': 3} {'type': 'loss', 'content': 0.09122709184885025, 'timestamp': '2025-09-30 22:40:37.654732', 'step': 17770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:37.712680', 'step': 17770, 'epoch': 3} {'type': 'loss', 'content': 0.0808001309633255, 'timestamp': '2025-09-30 22:40:37.716750', 'step': 17771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:37.774576', 'step': 17771, 'epoch': 3} {'type': 'loss', 'content': 0.08446455001831055, 'timestamp': '2025-09-30 22:40:37.781799', 'step': 17772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:37.838791', 'step': 17772, 'epoch': 3} {'type': 'loss', 'content': 0.098533995449543, 'timestamp': '2025-09-30 22:40:37.843062', 'step': 17773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:37.912901', 'step': 17773, 'epoch': 3} {'type': 'loss', 'content': 0.07366854697465897, 'timestamp': '2025-09-30 22:40:37.916631', 'step': 17774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:37.976242', 'step': 17774, 'epoch': 3} {'type': 'loss', 'content': 0.1056797057390213, 'timestamp': '2025-09-30 22:40:37.979640', 'step': 17775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:38.037593', 'step': 17775, 'epoch': 3} {'type': 'loss', 'content': 0.13050556182861328, 'timestamp': '2025-09-30 22:40:38.044111', 'step': 17776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:38.101772', 'step': 17776, 'epoch': 3} {'type': 'loss', 'content': 0.1332835555076599, 'timestamp': '2025-09-30 22:40:38.106155', 'step': 17777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:38.165408', 'step': 17777, 'epoch': 3} {'type': 'loss', 'content': 0.08387665450572968, 'timestamp': '2025-09-30 22:40:38.170746', 'step': 17778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:38.229924', 'step': 17778, 'epoch': 3} {'type': 'loss', 'content': 0.18068234622478485, 'timestamp': '2025-09-30 22:40:38.234635', 'step': 17779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:38.294129', 'step': 17779, 'epoch': 3} {'type': 'loss', 'content': 0.14109636843204498, 'timestamp': '2025-09-30 22:40:38.303250', 'step': 17780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:38.361318', 'step': 17780, 'epoch': 3} {'type': 'loss', 'content': 0.08751143515110016, 'timestamp': '2025-09-30 22:40:38.364101', 'step': 17781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:38.425216', 'step': 17781, 'epoch': 3} {'type': 'loss', 'content': 0.14298489689826965, 'timestamp': '2025-09-30 22:40:38.429496', 'step': 17782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:38.494227', 'step': 17782, 'epoch': 3} {'type': 'loss', 'content': 0.019707323983311653, 'timestamp': '2025-09-30 22:40:38.499040', 'step': 17783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:38.560970', 'step': 17783, 'epoch': 3} {'type': 'loss', 'content': 0.04817705228924751, 'timestamp': '2025-09-30 22:40:38.569579', 'step': 17784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:38.628589', 'step': 17784, 'epoch': 3} {'type': 'loss', 'content': 0.0894525870680809, 'timestamp': '2025-09-30 22:40:38.633393', 'step': 17785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:38.708510', 'step': 17785, 'epoch': 3} {'type': 'loss', 'content': 0.0810578241944313, 'timestamp': '2025-09-30 22:40:38.714547', 'step': 17786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:38.774275', 'step': 17786, 'epoch': 3} {'type': 'loss', 'content': 0.06383265554904938, 'timestamp': '2025-09-30 22:40:38.779155', 'step': 17787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:38.838104', 'step': 17787, 'epoch': 3} {'type': 'loss', 'content': 0.10604151338338852, 'timestamp': '2025-09-30 22:40:38.845655', 'step': 17788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:40:38.907680', 'step': 17788, 'epoch': 3} {'type': 'loss', 'content': 0.126898393034935, 'timestamp': '2025-09-30 22:40:38.912313', 'step': 17789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:38.975139', 'step': 17789, 'epoch': 3} {'type': 'loss', 'content': 0.08085831254720688, 'timestamp': '2025-09-30 22:40:38.977932', 'step': 17790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:39.035228', 'step': 17790, 'epoch': 3} {'type': 'loss', 'content': 0.08340668678283691, 'timestamp': '2025-09-30 22:40:39.038559', 'step': 17791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:39.096333', 'step': 17791, 'epoch': 3} {'type': 'loss', 'content': 0.06437384337186813, 'timestamp': '2025-09-30 22:40:39.103368', 'step': 17792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:39.160935', 'step': 17792, 'epoch': 3} {'type': 'loss', 'content': 0.1486358493566513, 'timestamp': '2025-09-30 22:40:39.163892', 'step': 17793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:39.222789', 'step': 17793, 'epoch': 3} {'type': 'loss', 'content': 0.07938975095748901, 'timestamp': '2025-09-30 22:40:39.232750', 'step': 17794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:39.297991', 'step': 17794, 'epoch': 3} {'type': 'loss', 'content': 0.09305111318826675, 'timestamp': '2025-09-30 22:40:39.302028', 'step': 17795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:39.360037', 'step': 17795, 'epoch': 3} {'type': 'loss', 'content': 0.16186238825321198, 'timestamp': '2025-09-30 22:40:39.366998', 'step': 17796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:39.429948', 'step': 17796, 'epoch': 3} {'type': 'loss', 'content': 0.18614694476127625, 'timestamp': '2025-09-30 22:40:39.434334', 'step': 17797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:39.497862', 'step': 17797, 'epoch': 3} {'type': 'loss', 'content': 0.1093359962105751, 'timestamp': '2025-09-30 22:40:39.502109', 'step': 17798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:39.563736', 'step': 17798, 'epoch': 3} {'type': 'loss', 'content': 0.09129855781793594, 'timestamp': '2025-09-30 22:40:39.567272', 'step': 17799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:39.625089', 'step': 17799, 'epoch': 3} {'type': 'loss', 'content': 0.12275008112192154, 'timestamp': '2025-09-30 22:40:39.632696', 'step': 17800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:39.689588', 'step': 17800, 'epoch': 3} {'type': 'loss', 'content': 0.1013374775648117, 'timestamp': '2025-09-30 22:40:39.695270', 'step': 17801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:39.753525', 'step': 17801, 'epoch': 3} {'type': 'loss', 'content': 0.16488151252269745, 'timestamp': '2025-09-30 22:40:39.756559', 'step': 17802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:39.814358', 'step': 17802, 'epoch': 3} {'type': 'loss', 'content': 0.10178138315677643, 'timestamp': '2025-09-30 22:40:39.817361', 'step': 17803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:39.874966', 'step': 17803, 'epoch': 3} {'type': 'loss', 'content': 0.14978931844234467, 'timestamp': '2025-09-30 22:40:39.881683', 'step': 17804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:39.946225', 'step': 17804, 'epoch': 3} {'type': 'loss', 'content': 0.03827737644314766, 'timestamp': '2025-09-30 22:40:39.949713', 'step': 17805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:40.008429', 'step': 17805, 'epoch': 3} {'type': 'loss', 'content': 0.03843747824430466, 'timestamp': '2025-09-30 22:40:40.011343', 'step': 17806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:40.068967', 'step': 17806, 'epoch': 3} {'type': 'loss', 'content': 0.05852718651294708, 'timestamp': '2025-09-30 22:40:40.072429', 'step': 17807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:40.134466', 'step': 17807, 'epoch': 3} {'type': 'loss', 'content': 0.07030945271253586, 'timestamp': '2025-09-30 22:40:40.141617', 'step': 17808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:40.197627', 'step': 17808, 'epoch': 3} {'type': 'loss', 'content': 0.09170951694250107, 'timestamp': '2025-09-30 22:40:40.202868', 'step': 17809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:40.263221', 'step': 17809, 'epoch': 3} {'type': 'loss', 'content': 0.09639937430620193, 'timestamp': '2025-09-30 22:40:40.269191', 'step': 17810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:40.328810', 'step': 17810, 'epoch': 3} {'type': 'loss', 'content': 0.04325925186276436, 'timestamp': '2025-09-30 22:40:40.335454', 'step': 17811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:40.397615', 'step': 17811, 'epoch': 3} {'type': 'loss', 'content': 0.06338902562856674, 'timestamp': '2025-09-30 22:40:40.407290', 'step': 17812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:40.466624', 'step': 17812, 'epoch': 3} {'type': 'loss', 'content': 0.05354689434170723, 'timestamp': '2025-09-30 22:40:40.473107', 'step': 17813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:40.535136', 'step': 17813, 'epoch': 3} {'type': 'loss', 'content': 0.08990124613046646, 'timestamp': '2025-09-30 22:40:40.550829', 'step': 17814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:40.608821', 'step': 17814, 'epoch': 3} {'type': 'loss', 'content': 0.06804149597883224, 'timestamp': '2025-09-30 22:40:40.617580', 'step': 17815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:40:40.688232', 'step': 17815, 'epoch': 3} {'type': 'loss', 'content': 0.13691556453704834, 'timestamp': '2025-09-30 22:40:40.694907', 'step': 17816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:40.762774', 'step': 17816, 'epoch': 3} {'type': 'loss', 'content': 0.052303534001111984, 'timestamp': '2025-09-30 22:40:40.766568', 'step': 17817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:40.825143', 'step': 17817, 'epoch': 3} {'type': 'loss', 'content': 0.09263226389884949, 'timestamp': '2025-09-30 22:40:40.828602', 'step': 17818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:40.887306', 'step': 17818, 'epoch': 3} {'type': 'loss', 'content': 0.1525362879037857, 'timestamp': '2025-09-30 22:40:40.891221', 'step': 17819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:40.949058', 'step': 17819, 'epoch': 3} {'type': 'loss', 'content': 0.05713881179690361, 'timestamp': '2025-09-30 22:40:40.956544', 'step': 17820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:41.014813', 'step': 17820, 'epoch': 3} {'type': 'loss', 'content': 0.1244891956448555, 'timestamp': '2025-09-30 22:40:41.019480', 'step': 17821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:41.077090', 'step': 17821, 'epoch': 3} {'type': 'loss', 'content': 0.057637769728899, 'timestamp': '2025-09-30 22:40:41.092176', 'step': 17822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:41.150085', 'step': 17822, 'epoch': 3} {'type': 'loss', 'content': 0.1403774619102478, 'timestamp': '2025-09-30 22:40:41.153817', 'step': 17823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:41.224185', 'step': 17823, 'epoch': 3} {'type': 'loss', 'content': 0.036350417882204056, 'timestamp': '2025-09-30 22:40:41.231967', 'step': 17824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:41.291666', 'step': 17824, 'epoch': 3} {'type': 'loss', 'content': 0.08166598528623581, 'timestamp': '2025-09-30 22:40:41.295498', 'step': 17825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:41.353633', 'step': 17825, 'epoch': 3} {'type': 'loss', 'content': 0.14361347258090973, 'timestamp': '2025-09-30 22:40:41.356899', 'step': 17826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:41.427974', 'step': 17826, 'epoch': 3} {'type': 'loss', 'content': 0.05208362638950348, 'timestamp': '2025-09-30 22:40:41.435042', 'step': 17827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:41.493348', 'step': 17827, 'epoch': 3} {'type': 'loss', 'content': 0.09197001159191132, 'timestamp': '2025-09-30 22:40:41.501277', 'step': 17828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:41.562850', 'step': 17828, 'epoch': 3} {'type': 'loss', 'content': 0.08416645973920822, 'timestamp': '2025-09-30 22:40:41.565972', 'step': 17829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:41.623520', 'step': 17829, 'epoch': 3} {'type': 'loss', 'content': 0.05241818726062775, 'timestamp': '2025-09-30 22:40:41.627407', 'step': 17830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:41.692150', 'step': 17830, 'epoch': 3} {'type': 'loss', 'content': 0.08262157440185547, 'timestamp': '2025-09-30 22:40:41.695909', 'step': 17831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:41.753964', 'step': 17831, 'epoch': 3} {'type': 'loss', 'content': 0.1163991391658783, 'timestamp': '2025-09-30 22:40:41.761553', 'step': 17832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:41.818890', 'step': 17832, 'epoch': 3} {'type': 'loss', 'content': 0.0792495533823967, 'timestamp': '2025-09-30 22:40:41.823199', 'step': 17833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:41.886127', 'step': 17833, 'epoch': 3} {'type': 'loss', 'content': 0.07729964703321457, 'timestamp': '2025-09-30 22:40:41.890453', 'step': 17834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:41.948314', 'step': 17834, 'epoch': 3} {'type': 'loss', 'content': 0.09816790372133255, 'timestamp': '2025-09-30 22:40:41.959030', 'step': 17835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:42.041640', 'step': 17835, 'epoch': 3} {'type': 'loss', 'content': 0.08622229844331741, 'timestamp': '2025-09-30 22:40:42.049109', 'step': 17836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:42.107485', 'step': 17836, 'epoch': 3} {'type': 'loss', 'content': 0.05128154903650284, 'timestamp': '2025-09-30 22:40:42.110231', 'step': 17837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:42.187085', 'step': 17837, 'epoch': 3} {'type': 'loss', 'content': 0.06629642099142075, 'timestamp': '2025-09-30 22:40:42.199906', 'step': 17838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:42.257935', 'step': 17838, 'epoch': 3} {'type': 'loss', 'content': 0.1088070273399353, 'timestamp': '2025-09-30 22:40:42.261458', 'step': 17839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:42.320071', 'step': 17839, 'epoch': 3} {'type': 'loss', 'content': 0.12827759981155396, 'timestamp': '2025-09-30 22:40:42.340693', 'step': 17840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:42.407649', 'step': 17840, 'epoch': 3} {'type': 'loss', 'content': 0.12450282275676727, 'timestamp': '2025-09-30 22:40:42.411516', 'step': 17841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:42.468345', 'step': 17841, 'epoch': 3} {'type': 'loss', 'content': 0.04440449923276901, 'timestamp': '2025-09-30 22:40:42.478010', 'step': 17842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:42.538683', 'step': 17842, 'epoch': 3} {'type': 'loss', 'content': 0.06927259266376495, 'timestamp': '2025-09-30 22:40:42.542429', 'step': 17843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:42.600838', 'step': 17843, 'epoch': 3} {'type': 'loss', 'content': 0.09964434057474136, 'timestamp': '2025-09-30 22:40:42.608862', 'step': 17844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:42.669158', 'step': 17844, 'epoch': 3} {'type': 'loss', 'content': 0.12519903481006622, 'timestamp': '2025-09-30 22:40:42.671879', 'step': 17845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:42.735190', 'step': 17845, 'epoch': 3} {'type': 'loss', 'content': 0.08024948835372925, 'timestamp': '2025-09-30 22:40:42.738929', 'step': 17846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:42.812865', 'step': 17846, 'epoch': 3} {'type': 'loss', 'content': 0.04645366966724396, 'timestamp': '2025-09-30 22:40:42.820369', 'step': 17847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:42.881793', 'step': 17847, 'epoch': 3} {'type': 'loss', 'content': 0.07857508212327957, 'timestamp': '2025-09-30 22:40:42.894153', 'step': 17848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:42.958768', 'step': 17848, 'epoch': 3} {'type': 'loss', 'content': 0.11315934360027313, 'timestamp': '2025-09-30 22:40:42.961462', 'step': 17849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:43.023565', 'step': 17849, 'epoch': 3} {'type': 'loss', 'content': 0.07728931307792664, 'timestamp': '2025-09-30 22:40:43.026526', 'step': 17850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.083945', 'step': 17850, 'epoch': 3} {'type': 'loss', 'content': 0.08641607314348221, 'timestamp': '2025-09-30 22:40:43.087066', 'step': 17851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:43.155354', 'step': 17851, 'epoch': 3} {'type': 'loss', 'content': 0.10532978177070618, 'timestamp': '2025-09-30 22:40:43.162836', 'step': 17852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.219268', 'step': 17852, 'epoch': 3} {'type': 'loss', 'content': 0.0883711725473404, 'timestamp': '2025-09-30 22:40:43.223041', 'step': 17853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.282273', 'step': 17853, 'epoch': 3} {'type': 'loss', 'content': 0.1685294508934021, 'timestamp': '2025-09-30 22:40:43.285847', 'step': 17854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.349986', 'step': 17854, 'epoch': 3} {'type': 'loss', 'content': 0.0579875186085701, 'timestamp': '2025-09-30 22:40:43.372539', 'step': 17855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:40:43.436019', 'step': 17855, 'epoch': 3} {'type': 'loss', 'content': 0.151799276471138, 'timestamp': '2025-09-30 22:40:43.445736', 'step': 17856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:43.512397', 'step': 17856, 'epoch': 3} {'type': 'loss', 'content': 0.1462922841310501, 'timestamp': '2025-09-30 22:40:43.518519', 'step': 17857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:43.577592', 'step': 17857, 'epoch': 3} {'type': 'loss', 'content': 0.15712004899978638, 'timestamp': '2025-09-30 22:40:43.581147', 'step': 17858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.643216', 'step': 17858, 'epoch': 3} {'type': 'loss', 'content': 0.12035240978002548, 'timestamp': '2025-09-30 22:40:43.649423', 'step': 17859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:43.714154', 'step': 17859, 'epoch': 3} {'type': 'loss', 'content': 0.11142651736736298, 'timestamp': '2025-09-30 22:40:43.737456', 'step': 17860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.807955', 'step': 17860, 'epoch': 3} {'type': 'loss', 'content': 0.10285282880067825, 'timestamp': '2025-09-30 22:40:43.818536', 'step': 17861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.876628', 'step': 17861, 'epoch': 3} {'type': 'loss', 'content': 0.0872131735086441, 'timestamp': '2025-09-30 22:40:43.896877', 'step': 17862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:43.973609', 'step': 17862, 'epoch': 3} {'type': 'loss', 'content': 0.08183378726243973, 'timestamp': '2025-09-30 22:40:43.978344', 'step': 17863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:44.035918', 'step': 17863, 'epoch': 3} {'type': 'loss', 'content': 0.06768764555454254, 'timestamp': '2025-09-30 22:40:44.042626', 'step': 17864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:44.099955', 'step': 17864, 'epoch': 3} {'type': 'loss', 'content': 0.09292664378881454, 'timestamp': '2025-09-30 22:40:44.103734', 'step': 17865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:44.166556', 'step': 17865, 'epoch': 3} {'type': 'loss', 'content': 0.10823998600244522, 'timestamp': '2025-09-30 22:40:44.170736', 'step': 17866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:44.233289', 'step': 17866, 'epoch': 3} {'type': 'loss', 'content': 0.08563375473022461, 'timestamp': '2025-09-30 22:40:44.236526', 'step': 17867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:44.308654', 'step': 17867, 'epoch': 3} {'type': 'loss', 'content': 0.09752565622329712, 'timestamp': '2025-09-30 22:40:44.321496', 'step': 17868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:44.379173', 'step': 17868, 'epoch': 3} {'type': 'loss', 'content': 0.055940981954336166, 'timestamp': '2025-09-30 22:40:44.384271', 'step': 17869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:44.451762', 'step': 17869, 'epoch': 3} {'type': 'loss', 'content': 0.059476546943187714, 'timestamp': '2025-09-30 22:40:44.454994', 'step': 17870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:44.520970', 'step': 17870, 'epoch': 3} {'type': 'loss', 'content': 0.07223071157932281, 'timestamp': '2025-09-30 22:40:44.524268', 'step': 17871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:44.581895', 'step': 17871, 'epoch': 3} {'type': 'loss', 'content': 0.1861494779586792, 'timestamp': '2025-09-30 22:40:44.589582', 'step': 17872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:44.650103', 'step': 17872, 'epoch': 3} {'type': 'loss', 'content': 0.10477907210588455, 'timestamp': '2025-09-30 22:40:44.654433', 'step': 17873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:44.722522', 'step': 17873, 'epoch': 3} {'type': 'loss', 'content': 0.12160684168338776, 'timestamp': '2025-09-30 22:40:44.726116', 'step': 17874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:44.783790', 'step': 17874, 'epoch': 3} {'type': 'loss', 'content': 0.0609893761575222, 'timestamp': '2025-09-30 22:40:44.786499', 'step': 17875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:44.844412', 'step': 17875, 'epoch': 3} {'type': 'loss', 'content': 0.052558206021785736, 'timestamp': '2025-09-30 22:40:44.852177', 'step': 17876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:44.910969', 'step': 17876, 'epoch': 3} {'type': 'loss', 'content': 0.09182266145944595, 'timestamp': '2025-09-30 22:40:44.915185', 'step': 17877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:44.976471', 'step': 17877, 'epoch': 3} {'type': 'loss', 'content': 0.17707760632038116, 'timestamp': '2025-09-30 22:40:44.979875', 'step': 17878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:45.038884', 'step': 17878, 'epoch': 3} {'type': 'loss', 'content': 0.10176221281290054, 'timestamp': '2025-09-30 22:40:45.042856', 'step': 17879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:45.102629', 'step': 17879, 'epoch': 3} {'type': 'loss', 'content': 0.13133303821086884, 'timestamp': '2025-09-30 22:40:45.110515', 'step': 17880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:45.167730', 'step': 17880, 'epoch': 3} {'type': 'loss', 'content': 0.10560408979654312, 'timestamp': '2025-09-30 22:40:45.170743', 'step': 17881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:45.229131', 'step': 17881, 'epoch': 3} {'type': 'loss', 'content': 0.1263592541217804, 'timestamp': '2025-09-30 22:40:45.241144', 'step': 17882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:45.302845', 'step': 17882, 'epoch': 3} {'type': 'loss', 'content': 0.13326820731163025, 'timestamp': '2025-09-30 22:40:45.307485', 'step': 17883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:45.366102', 'step': 17883, 'epoch': 3} {'type': 'loss', 'content': 0.1266605406999588, 'timestamp': '2025-09-30 22:40:45.374079', 'step': 17884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:45.434848', 'step': 17884, 'epoch': 3} {'type': 'loss', 'content': 0.10600397735834122, 'timestamp': '2025-09-30 22:40:45.438691', 'step': 17885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:45.506568', 'step': 17885, 'epoch': 3} {'type': 'loss', 'content': 0.12705643475055695, 'timestamp': '2025-09-30 22:40:45.509984', 'step': 17886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:45.583123', 'step': 17886, 'epoch': 3} {'type': 'loss', 'content': 0.10594049841165543, 'timestamp': '2025-09-30 22:40:45.589349', 'step': 17887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:45.653374', 'step': 17887, 'epoch': 3} {'type': 'loss', 'content': 0.055979546159505844, 'timestamp': '2025-09-30 22:40:45.671104', 'step': 17888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:45.736974', 'step': 17888, 'epoch': 3} {'type': 'loss', 'content': 0.09366154670715332, 'timestamp': '2025-09-30 22:40:45.740420', 'step': 17889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:45.803573', 'step': 17889, 'epoch': 3} {'type': 'loss', 'content': 0.07812459766864777, 'timestamp': '2025-09-30 22:40:45.807579', 'step': 17890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:45.865686', 'step': 17890, 'epoch': 3} {'type': 'loss', 'content': 0.16559043526649475, 'timestamp': '2025-09-30 22:40:45.870127', 'step': 17891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:45.935924', 'step': 17891, 'epoch': 3} {'type': 'loss', 'content': 0.10605473816394806, 'timestamp': '2025-09-30 22:40:45.943664', 'step': 17892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:46.009499', 'step': 17892, 'epoch': 3} {'type': 'loss', 'content': 0.12809903919696808, 'timestamp': '2025-09-30 22:40:46.012637', 'step': 17893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:46.085248', 'step': 17893, 'epoch': 3} {'type': 'loss', 'content': 0.047256406396627426, 'timestamp': '2025-09-30 22:40:46.088659', 'step': 17894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:46.148834', 'step': 17894, 'epoch': 3} {'type': 'loss', 'content': 0.08300137519836426, 'timestamp': '2025-09-30 22:40:46.152566', 'step': 17895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:46.228326', 'step': 17895, 'epoch': 3} {'type': 'loss', 'content': 0.05761168524622917, 'timestamp': '2025-09-30 22:40:46.237034', 'step': 17896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:40:46.296697', 'step': 17896, 'epoch': 3} {'type': 'loss', 'content': 0.13945981860160828, 'timestamp': '2025-09-30 22:40:46.300831', 'step': 17897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:46.368904', 'step': 17897, 'epoch': 3} {'type': 'loss', 'content': 0.03699321672320366, 'timestamp': '2025-09-30 22:40:46.373469', 'step': 17898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:46.432819', 'step': 17898, 'epoch': 3} {'type': 'loss', 'content': 0.02470877580344677, 'timestamp': '2025-09-30 22:40:46.436843', 'step': 17899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:46.497936', 'step': 17899, 'epoch': 3} {'type': 'loss', 'content': 0.13015450537204742, 'timestamp': '2025-09-30 22:40:46.505771', 'step': 17900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:46.577781', 'step': 17900, 'epoch': 3} {'type': 'loss', 'content': 0.07929587364196777, 'timestamp': '2025-09-30 22:40:46.581191', 'step': 17901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:46.640402', 'step': 17901, 'epoch': 3} {'type': 'loss', 'content': 0.07579971849918365, 'timestamp': '2025-09-30 22:40:46.644135', 'step': 17902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:46.704717', 'step': 17902, 'epoch': 3} {'type': 'loss', 'content': 0.024339325726032257, 'timestamp': '2025-09-30 22:40:46.708876', 'step': 17903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:46.769456', 'step': 17903, 'epoch': 3} {'type': 'loss', 'content': 0.09241267293691635, 'timestamp': '2025-09-30 22:40:46.779689', 'step': 17904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:46.848696', 'step': 17904, 'epoch': 3} {'type': 'loss', 'content': 0.06851506233215332, 'timestamp': '2025-09-30 22:40:46.853275', 'step': 17905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:46.913979', 'step': 17905, 'epoch': 3} {'type': 'loss', 'content': 0.04869256541132927, 'timestamp': '2025-09-30 22:40:46.919136', 'step': 17906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:46.981180', 'step': 17906, 'epoch': 3} {'type': 'loss', 'content': 0.053327955305576324, 'timestamp': '2025-09-30 22:40:46.992432', 'step': 17907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:40:47.066559', 'step': 17907, 'epoch': 3} {'type': 'loss', 'content': 0.1325375735759735, 'timestamp': '2025-09-30 22:40:47.074459', 'step': 17908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:47.137185', 'step': 17908, 'epoch': 3} {'type': 'loss', 'content': 0.1405399590730667, 'timestamp': '2025-09-30 22:40:47.142108', 'step': 17909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:47.201708', 'step': 17909, 'epoch': 3} {'type': 'loss', 'content': 0.10647786408662796, 'timestamp': '2025-09-30 22:40:47.205315', 'step': 17910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:47.264455', 'step': 17910, 'epoch': 3} {'type': 'loss', 'content': 0.08465634286403656, 'timestamp': '2025-09-30 22:40:47.268456', 'step': 17911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:47.342701', 'step': 17911, 'epoch': 3} {'type': 'loss', 'content': 0.15146638453006744, 'timestamp': '2025-09-30 22:40:47.351021', 'step': 17912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:47.411615', 'step': 17912, 'epoch': 3} {'type': 'loss', 'content': 0.13944454491138458, 'timestamp': '2025-09-30 22:40:47.416269', 'step': 17913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:47.474648', 'step': 17913, 'epoch': 3} {'type': 'loss', 'content': 0.13324524462223053, 'timestamp': '2025-09-30 22:40:47.491138', 'step': 17914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:47.552970', 'step': 17914, 'epoch': 3} {'type': 'loss', 'content': 0.10682649910449982, 'timestamp': '2025-09-30 22:40:47.557773', 'step': 17915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:47.631976', 'step': 17915, 'epoch': 3} {'type': 'loss', 'content': 0.07970484346151352, 'timestamp': '2025-09-30 22:40:47.638507', 'step': 17916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:47.696817', 'step': 17916, 'epoch': 3} {'type': 'loss', 'content': 0.07780366390943527, 'timestamp': '2025-09-30 22:40:47.700762', 'step': 17917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:47.758902', 'step': 17917, 'epoch': 3} {'type': 'loss', 'content': 0.1116383746266365, 'timestamp': '2025-09-30 22:40:47.762974', 'step': 17918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:40:47.821139', 'step': 17918, 'epoch': 3} {'type': 'loss', 'content': 0.06588238477706909, 'timestamp': '2025-09-30 22:40:47.825025', 'step': 17919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:47.883889', 'step': 17919, 'epoch': 3} {'type': 'loss', 'content': 0.06464028358459473, 'timestamp': '2025-09-30 22:40:47.891234', 'step': 17920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:47.968639', 'step': 17920, 'epoch': 3} {'type': 'loss', 'content': 0.0510796383023262, 'timestamp': '2025-09-30 22:40:47.971990', 'step': 17921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.030975', 'step': 17921, 'epoch': 3} {'type': 'loss', 'content': 0.10423018038272858, 'timestamp': '2025-09-30 22:40:48.034087', 'step': 17922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.095343', 'step': 17922, 'epoch': 3} {'type': 'loss', 'content': 0.08240726590156555, 'timestamp': '2025-09-30 22:40:48.099340', 'step': 17923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.156457', 'step': 17923, 'epoch': 3} {'type': 'loss', 'content': 0.04932497441768646, 'timestamp': '2025-09-30 22:40:48.165250', 'step': 17924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:48.224881', 'step': 17924, 'epoch': 3} {'type': 'loss', 'content': 0.02802850678563118, 'timestamp': '2025-09-30 22:40:48.230338', 'step': 17925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.299195', 'step': 17925, 'epoch': 3} {'type': 'loss', 'content': 0.13383200764656067, 'timestamp': '2025-09-30 22:40:48.306218', 'step': 17926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:48.365518', 'step': 17926, 'epoch': 3} {'type': 'loss', 'content': 0.1046045795083046, 'timestamp': '2025-09-30 22:40:48.370424', 'step': 17927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.429130', 'step': 17927, 'epoch': 3} {'type': 'loss', 'content': 0.03565352410078049, 'timestamp': '2025-09-30 22:40:48.436836', 'step': 17928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:48.511071', 'step': 17928, 'epoch': 3} {'type': 'loss', 'content': 0.12608538568019867, 'timestamp': '2025-09-30 22:40:48.515966', 'step': 17929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:48.575457', 'step': 17929, 'epoch': 3} {'type': 'loss', 'content': 0.21584559977054596, 'timestamp': '2025-09-30 22:40:48.580143', 'step': 17930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:48.642907', 'step': 17930, 'epoch': 3} {'type': 'loss', 'content': 0.03698960319161415, 'timestamp': '2025-09-30 22:40:48.647300', 'step': 17931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.709077', 'step': 17931, 'epoch': 3} {'type': 'loss', 'content': 0.016107458621263504, 'timestamp': '2025-09-30 22:40:48.716830', 'step': 17932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.775419', 'step': 17932, 'epoch': 3} {'type': 'loss', 'content': 0.06082197651267052, 'timestamp': '2025-09-30 22:40:48.779146', 'step': 17933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:48.848671', 'step': 17933, 'epoch': 3} {'type': 'loss', 'content': 0.048010390251874924, 'timestamp': '2025-09-30 22:40:48.853128', 'step': 17934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.918542', 'step': 17934, 'epoch': 3} {'type': 'loss', 'content': 0.020461613312363625, 'timestamp': '2025-09-30 22:40:48.921856', 'step': 17935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:48.980952', 'step': 17935, 'epoch': 3} {'type': 'loss', 'content': 0.0910358875989914, 'timestamp': '2025-09-30 22:40:48.988707', 'step': 17936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:49.046623', 'step': 17936, 'epoch': 3} {'type': 'loss', 'content': 0.09012457728385925, 'timestamp': '2025-09-30 22:40:49.050961', 'step': 17937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:49.119088', 'step': 17937, 'epoch': 3} {'type': 'loss', 'content': 0.12048963457345963, 'timestamp': '2025-09-30 22:40:49.124766', 'step': 17938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:49.183820', 'step': 17938, 'epoch': 3} {'type': 'loss', 'content': 0.033945031464099884, 'timestamp': '2025-09-30 22:40:49.187414', 'step': 17939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:49.245672', 'step': 17939, 'epoch': 3} {'type': 'loss', 'content': 0.10127251595258713, 'timestamp': '2025-09-30 22:40:49.262581', 'step': 17940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:49.328108', 'step': 17940, 'epoch': 3} {'type': 'loss', 'content': 0.12279107421636581, 'timestamp': '2025-09-30 22:40:49.332600', 'step': 17941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:49.389609', 'step': 17941, 'epoch': 3} {'type': 'loss', 'content': 0.10312498360872269, 'timestamp': '2025-09-30 22:40:49.394310', 'step': 17942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:49.456565', 'step': 17942, 'epoch': 3} {'type': 'loss', 'content': 0.11121685802936554, 'timestamp': '2025-09-30 22:40:49.461079', 'step': 17943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:49.521159', 'step': 17943, 'epoch': 3} {'type': 'loss', 'content': 0.07458125799894333, 'timestamp': '2025-09-30 22:40:49.528835', 'step': 17944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:49.585784', 'step': 17944, 'epoch': 3} {'type': 'loss', 'content': 0.10350720584392548, 'timestamp': '2025-09-30 22:40:49.590010', 'step': 17945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:49.649544', 'step': 17945, 'epoch': 3} {'type': 'loss', 'content': 0.016061313450336456, 'timestamp': '2025-09-30 22:40:49.653042', 'step': 17946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:49.710429', 'step': 17946, 'epoch': 3} {'type': 'loss', 'content': 0.08521123230457306, 'timestamp': '2025-09-30 22:40:49.713996', 'step': 17947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:49.771596', 'step': 17947, 'epoch': 3} {'type': 'loss', 'content': 0.11217159777879715, 'timestamp': '2025-09-30 22:40:49.785689', 'step': 17948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:49.843262', 'step': 17948, 'epoch': 3} {'type': 'loss', 'content': 0.06116923317313194, 'timestamp': '2025-09-30 22:40:49.846543', 'step': 17949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:49.912142', 'step': 17949, 'epoch': 3} {'type': 'loss', 'content': 0.1295429766178131, 'timestamp': '2025-09-30 22:40:49.915574', 'step': 17950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:49.986886', 'step': 17950, 'epoch': 3} {'type': 'loss', 'content': 0.11879787594079971, 'timestamp': '2025-09-30 22:40:49.991227', 'step': 17951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:50.049086', 'step': 17951, 'epoch': 3} {'type': 'loss', 'content': 0.09623092412948608, 'timestamp': '2025-09-30 22:40:50.056814', 'step': 17952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:50.113465', 'step': 17952, 'epoch': 3} {'type': 'loss', 'content': 0.11712238937616348, 'timestamp': '2025-09-30 22:40:50.118169', 'step': 17953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:50.175911', 'step': 17953, 'epoch': 3} {'type': 'loss', 'content': 0.10232795774936676, 'timestamp': '2025-09-30 22:40:50.196436', 'step': 17954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:50.268380', 'step': 17954, 'epoch': 3} {'type': 'loss', 'content': 0.08005646616220474, 'timestamp': '2025-09-30 22:40:50.278413', 'step': 17955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:50.354901', 'step': 17955, 'epoch': 3} {'type': 'loss', 'content': 0.16321654617786407, 'timestamp': '2025-09-30 22:40:50.361987', 'step': 17956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:50.431398', 'step': 17956, 'epoch': 3} {'type': 'loss', 'content': 0.0952536091208458, 'timestamp': '2025-09-30 22:40:50.436794', 'step': 17957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:50.496301', 'step': 17957, 'epoch': 3} {'type': 'loss', 'content': 0.11992297321557999, 'timestamp': '2025-09-30 22:40:50.499416', 'step': 17958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:50.569322', 'step': 17958, 'epoch': 3} {'type': 'loss', 'content': 0.03970593586564064, 'timestamp': '2025-09-30 22:40:50.581217', 'step': 17959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:50.641189', 'step': 17959, 'epoch': 3} {'type': 'loss', 'content': 0.11767110973596573, 'timestamp': '2025-09-30 22:40:50.650073', 'step': 17960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:50.709576', 'step': 17960, 'epoch': 3} {'type': 'loss', 'content': 0.09024392068386078, 'timestamp': '2025-09-30 22:40:50.720354', 'step': 17961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:50.777876', 'step': 17961, 'epoch': 3} {'type': 'loss', 'content': 0.0639844760298729, 'timestamp': '2025-09-30 22:40:50.782773', 'step': 17962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:50.843404', 'step': 17962, 'epoch': 3} {'type': 'loss', 'content': 0.11619223654270172, 'timestamp': '2025-09-30 22:40:50.847977', 'step': 17963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:50.905379', 'step': 17963, 'epoch': 3} {'type': 'loss', 'content': 0.11975671350955963, 'timestamp': '2025-09-30 22:40:50.917432', 'step': 17964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:50.974610', 'step': 17964, 'epoch': 3} {'type': 'loss', 'content': 0.17085348069667816, 'timestamp': '2025-09-30 22:40:50.977779', 'step': 17965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:51.046931', 'step': 17965, 'epoch': 3} {'type': 'loss', 'content': 0.0586133748292923, 'timestamp': '2025-09-30 22:40:51.052485', 'step': 17966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:51.130217', 'step': 17966, 'epoch': 3} {'type': 'loss', 'content': 0.18648186326026917, 'timestamp': '2025-09-30 22:40:51.134341', 'step': 17967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:40:51.193907', 'step': 17967, 'epoch': 3} {'type': 'loss', 'content': 0.06514248251914978, 'timestamp': '2025-09-30 22:40:51.202833', 'step': 17968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:51.260135', 'step': 17968, 'epoch': 3} {'type': 'loss', 'content': 0.09619968384504318, 'timestamp': '2025-09-30 22:40:51.263471', 'step': 17969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:51.335733', 'step': 17969, 'epoch': 3} {'type': 'loss', 'content': 0.052801210433244705, 'timestamp': '2025-09-30 22:40:51.338766', 'step': 17970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:51.400707', 'step': 17970, 'epoch': 3} {'type': 'loss', 'content': 0.12716661393642426, 'timestamp': '2025-09-30 22:40:51.405249', 'step': 17971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:51.469383', 'step': 17971, 'epoch': 3} {'type': 'loss', 'content': 0.09948327392339706, 'timestamp': '2025-09-30 22:40:51.476544', 'step': 17972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:51.538116', 'step': 17972, 'epoch': 3} {'type': 'loss', 'content': 0.11973026394844055, 'timestamp': '2025-09-30 22:40:51.540862', 'step': 17973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:51.597825', 'step': 17973, 'epoch': 3} {'type': 'loss', 'content': 0.11859174817800522, 'timestamp': '2025-09-30 22:40:51.600811', 'step': 17974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:51.670998', 'step': 17974, 'epoch': 3} {'type': 'loss', 'content': 0.05551717057824135, 'timestamp': '2025-09-30 22:40:51.675358', 'step': 17975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:51.738729', 'step': 17975, 'epoch': 3} {'type': 'loss', 'content': 0.14192895591259003, 'timestamp': '2025-09-30 22:40:51.745152', 'step': 17976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:51.815628', 'step': 17976, 'epoch': 3} {'type': 'loss', 'content': 0.047791142016649246, 'timestamp': '2025-09-30 22:40:51.825298', 'step': 17977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:51.905258', 'step': 17977, 'epoch': 3} {'type': 'loss', 'content': 0.1434941589832306, 'timestamp': '2025-09-30 22:40:51.908145', 'step': 17978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:51.967856', 'step': 17978, 'epoch': 3} {'type': 'loss', 'content': 0.07869527488946915, 'timestamp': '2025-09-30 22:40:51.970945', 'step': 17979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:52.028293', 'step': 17979, 'epoch': 3} {'type': 'loss', 'content': 0.14524979889392853, 'timestamp': '2025-09-30 22:40:52.034791', 'step': 17980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:52.094112', 'step': 17980, 'epoch': 3} {'type': 'loss', 'content': 0.013532079756259918, 'timestamp': '2025-09-30 22:40:52.099966', 'step': 17981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:52.181800', 'step': 17981, 'epoch': 3} {'type': 'loss', 'content': 0.046883292496204376, 'timestamp': '2025-09-30 22:40:52.185260', 'step': 17982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:52.244729', 'step': 17982, 'epoch': 3} {'type': 'loss', 'content': 0.12043903023004532, 'timestamp': '2025-09-30 22:40:52.256829', 'step': 17983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:52.326419', 'step': 17983, 'epoch': 3} {'type': 'loss', 'content': 0.1136796623468399, 'timestamp': '2025-09-30 22:40:52.338604', 'step': 17984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:52.406069', 'step': 17984, 'epoch': 3} {'type': 'loss', 'content': 0.06731253117322922, 'timestamp': '2025-09-30 22:40:52.410088', 'step': 17985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:52.468781', 'step': 17985, 'epoch': 3} {'type': 'loss', 'content': 0.05138949304819107, 'timestamp': '2025-09-30 22:40:52.472761', 'step': 17986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:52.531534', 'step': 17986, 'epoch': 3} {'type': 'loss', 'content': 0.056763436645269394, 'timestamp': '2025-09-30 22:40:52.540946', 'step': 17987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:52.617572', 'step': 17987, 'epoch': 3} {'type': 'loss', 'content': 0.07168468087911606, 'timestamp': '2025-09-30 22:40:52.625261', 'step': 17988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:52.681717', 'step': 17988, 'epoch': 3} {'type': 'loss', 'content': 0.17550373077392578, 'timestamp': '2025-09-30 22:40:52.684922', 'step': 17989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:52.742753', 'step': 17989, 'epoch': 3} {'type': 'loss', 'content': 0.1210750937461853, 'timestamp': '2025-09-30 22:40:52.746734', 'step': 17990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:52.804598', 'step': 17990, 'epoch': 3} {'type': 'loss', 'content': 0.11784941703081131, 'timestamp': '2025-09-30 22:40:52.808498', 'step': 17991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:52.883682', 'step': 17991, 'epoch': 3} {'type': 'loss', 'content': 0.10149137675762177, 'timestamp': '2025-09-30 22:40:52.897915', 'step': 17992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:52.960385', 'step': 17992, 'epoch': 3} {'type': 'loss', 'content': 0.1443026214838028, 'timestamp': '2025-09-30 22:40:52.963530', 'step': 17993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:53.021528', 'step': 17993, 'epoch': 3} {'type': 'loss', 'content': 0.08714626729488373, 'timestamp': '2025-09-30 22:40:53.024568', 'step': 17994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:53.094913', 'step': 17994, 'epoch': 3} {'type': 'loss', 'content': 0.07733871042728424, 'timestamp': '2025-09-30 22:40:53.098497', 'step': 17995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:53.168504', 'step': 17995, 'epoch': 3} {'type': 'loss', 'content': 0.053036127239465714, 'timestamp': '2025-09-30 22:40:53.175650', 'step': 17996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:53.244071', 'step': 17996, 'epoch': 3} {'type': 'loss', 'content': 0.09660923480987549, 'timestamp': '2025-09-30 22:40:53.247795', 'step': 17997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:53.305266', 'step': 17997, 'epoch': 3} {'type': 'loss', 'content': 0.07498398423194885, 'timestamp': '2025-09-30 22:40:53.308510', 'step': 17998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:53.374248', 'step': 17998, 'epoch': 3} {'type': 'loss', 'content': 0.07968975603580475, 'timestamp': '2025-09-30 22:40:53.377413', 'step': 17999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:53.434959', 'step': 17999, 'epoch': 3} {'type': 'loss', 'content': 0.21734897792339325, 'timestamp': '2025-09-30 22:40:53.441579', 'step': 18000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18000', 'timestamp': '2025-09-30 22:40:53.889327', 'step': 18000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:53.949349', 'step': 18000, 'epoch': 3} {'type': 'loss', 'content': 0.075593501329422, 'timestamp': '2025-09-30 22:40:53.952363', 'step': 18001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:54.010808', 'step': 18001, 'epoch': 3} {'type': 'loss', 'content': 0.10602456331253052, 'timestamp': '2025-09-30 22:40:54.013914', 'step': 18002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:54.072075', 'step': 18002, 'epoch': 3} {'type': 'loss', 'content': 0.12621092796325684, 'timestamp': '2025-09-30 22:40:54.075034', 'step': 18003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:54.134617', 'step': 18003, 'epoch': 3} {'type': 'loss', 'content': 0.06413427740335464, 'timestamp': '2025-09-30 22:40:54.141221', 'step': 18004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:54.197979', 'step': 18004, 'epoch': 3} {'type': 'loss', 'content': 0.027439771220088005, 'timestamp': '2025-09-30 22:40:54.201565', 'step': 18005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:54.269697', 'step': 18005, 'epoch': 3} {'type': 'loss', 'content': 0.07925079762935638, 'timestamp': '2025-09-30 22:40:54.277867', 'step': 18006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:54.334697', 'step': 18006, 'epoch': 3} {'type': 'loss', 'content': 0.045514822006225586, 'timestamp': '2025-09-30 22:40:54.337950', 'step': 18007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:54.395090', 'step': 18007, 'epoch': 3} {'type': 'loss', 'content': 0.13627414405345917, 'timestamp': '2025-09-30 22:40:54.402769', 'step': 18008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:54.460077', 'step': 18008, 'epoch': 3} {'type': 'loss', 'content': 0.12699103355407715, 'timestamp': '2025-09-30 22:40:54.462954', 'step': 18009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:54.520236', 'step': 18009, 'epoch': 3} {'type': 'loss', 'content': 0.05009149760007858, 'timestamp': '2025-09-30 22:40:54.523035', 'step': 18010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:54.580734', 'step': 18010, 'epoch': 3} {'type': 'loss', 'content': 0.1471666395664215, 'timestamp': '2025-09-30 22:40:54.584697', 'step': 18011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:54.655210', 'step': 18011, 'epoch': 3} {'type': 'loss', 'content': 0.1572253257036209, 'timestamp': '2025-09-30 22:40:54.663161', 'step': 18012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:54.720204', 'step': 18012, 'epoch': 3} {'type': 'loss', 'content': 0.12800540030002594, 'timestamp': '2025-09-30 22:40:54.724571', 'step': 18013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:54.786497', 'step': 18013, 'epoch': 3} {'type': 'loss', 'content': 0.06804722547531128, 'timestamp': '2025-09-30 22:40:54.795201', 'step': 18014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:54.854087', 'step': 18014, 'epoch': 3} {'type': 'loss', 'content': 0.0793018713593483, 'timestamp': '2025-09-30 22:40:54.857374', 'step': 18015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:54.915071', 'step': 18015, 'epoch': 3} {'type': 'loss', 'content': 0.09399472922086716, 'timestamp': '2025-09-30 22:40:54.922324', 'step': 18016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:54.979483', 'step': 18016, 'epoch': 3} {'type': 'loss', 'content': 0.06698952615261078, 'timestamp': '2025-09-30 22:40:54.982340', 'step': 18017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:55.040228', 'step': 18017, 'epoch': 3} {'type': 'loss', 'content': 0.07952331751585007, 'timestamp': '2025-09-30 22:40:55.043910', 'step': 18018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:55.101990', 'step': 18018, 'epoch': 3} {'type': 'loss', 'content': 0.058368150144815445, 'timestamp': '2025-09-30 22:40:55.110529', 'step': 18019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:55.183088', 'step': 18019, 'epoch': 3} {'type': 'loss', 'content': 0.08572415262460709, 'timestamp': '2025-09-30 22:40:55.194372', 'step': 18020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:55.250813', 'step': 18020, 'epoch': 3} {'type': 'loss', 'content': 0.12144871056079865, 'timestamp': '2025-09-30 22:40:55.253593', 'step': 18021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:55.311871', 'step': 18021, 'epoch': 3} {'type': 'loss', 'content': 0.05868709459900856, 'timestamp': '2025-09-30 22:40:55.316165', 'step': 18022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:55.375566', 'step': 18022, 'epoch': 3} {'type': 'loss', 'content': 0.15652070939540863, 'timestamp': '2025-09-30 22:40:55.378272', 'step': 18023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:55.444709', 'step': 18023, 'epoch': 3} {'type': 'loss', 'content': 0.06300809979438782, 'timestamp': '2025-09-30 22:40:55.451638', 'step': 18024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:55.507603', 'step': 18024, 'epoch': 3} {'type': 'loss', 'content': 0.09632442891597748, 'timestamp': '2025-09-30 22:40:55.511248', 'step': 18025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:55.568533', 'step': 18025, 'epoch': 3} {'type': 'loss', 'content': 0.06106271222233772, 'timestamp': '2025-09-30 22:40:55.572779', 'step': 18026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:55.632003', 'step': 18026, 'epoch': 3} {'type': 'loss', 'content': 0.09124728292226791, 'timestamp': '2025-09-30 22:40:55.635299', 'step': 18027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:55.693510', 'step': 18027, 'epoch': 3} {'type': 'loss', 'content': 0.09274537116289139, 'timestamp': '2025-09-30 22:40:55.701748', 'step': 18028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:55.761734', 'step': 18028, 'epoch': 3} {'type': 'loss', 'content': 0.04086807370185852, 'timestamp': '2025-09-30 22:40:55.765020', 'step': 18029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:55.823221', 'step': 18029, 'epoch': 3} {'type': 'loss', 'content': 0.057610224932432175, 'timestamp': '2025-09-30 22:40:55.826984', 'step': 18030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:55.885644', 'step': 18030, 'epoch': 3} {'type': 'loss', 'content': 0.08002494275569916, 'timestamp': '2025-09-30 22:40:55.889570', 'step': 18031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:55.946686', 'step': 18031, 'epoch': 3} {'type': 'loss', 'content': 0.111221082508564, 'timestamp': '2025-09-30 22:40:55.953110', 'step': 18032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:56.009862', 'step': 18032, 'epoch': 3} {'type': 'loss', 'content': 0.08511755615472794, 'timestamp': '2025-09-30 22:40:56.013215', 'step': 18033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:56.081938', 'step': 18033, 'epoch': 3} {'type': 'loss', 'content': 0.09415794909000397, 'timestamp': '2025-09-30 22:40:56.084920', 'step': 18034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:56.143682', 'step': 18034, 'epoch': 3} {'type': 'loss', 'content': 0.07524055242538452, 'timestamp': '2025-09-30 22:40:56.151883', 'step': 18035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:56.211577', 'step': 18035, 'epoch': 3} {'type': 'loss', 'content': 0.14772929251194, 'timestamp': '2025-09-30 22:40:56.217968', 'step': 18036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:56.283274', 'step': 18036, 'epoch': 3} {'type': 'loss', 'content': 0.1269667148590088, 'timestamp': '2025-09-30 22:40:56.288258', 'step': 18037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:56.346473', 'step': 18037, 'epoch': 3} {'type': 'loss', 'content': 0.03122621215879917, 'timestamp': '2025-09-30 22:40:56.350995', 'step': 18038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:56.423334', 'step': 18038, 'epoch': 3} {'type': 'loss', 'content': 0.10586719214916229, 'timestamp': '2025-09-30 22:40:56.427279', 'step': 18039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:56.494718', 'step': 18039, 'epoch': 3} {'type': 'loss', 'content': 0.008447731845080853, 'timestamp': '2025-09-30 22:40:56.502086', 'step': 18040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:56.564960', 'step': 18040, 'epoch': 3} {'type': 'loss', 'content': 0.06558368355035782, 'timestamp': '2025-09-30 22:40:56.569446', 'step': 18041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:56.627803', 'step': 18041, 'epoch': 3} {'type': 'loss', 'content': 0.09427279233932495, 'timestamp': '2025-09-30 22:40:56.634417', 'step': 18042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:56.693955', 'step': 18042, 'epoch': 3} {'type': 'loss', 'content': 0.07126038521528244, 'timestamp': '2025-09-30 22:40:56.704540', 'step': 18043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:56.765049', 'step': 18043, 'epoch': 3} {'type': 'loss', 'content': 0.07666373997926712, 'timestamp': '2025-09-30 22:40:56.771497', 'step': 18044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:56.829243', 'step': 18044, 'epoch': 3} {'type': 'loss', 'content': 0.08523447066545486, 'timestamp': '2025-09-30 22:40:56.834078', 'step': 18045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:56.893593', 'step': 18045, 'epoch': 3} {'type': 'loss', 'content': 0.10526806861162186, 'timestamp': '2025-09-30 22:40:56.896485', 'step': 18046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:56.954721', 'step': 18046, 'epoch': 3} {'type': 'loss', 'content': 0.04492257162928581, 'timestamp': '2025-09-30 22:40:56.962743', 'step': 18047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:40:57.037062', 'step': 18047, 'epoch': 3} {'type': 'loss', 'content': 0.04679350182414055, 'timestamp': '2025-09-30 22:40:57.044485', 'step': 18048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:57.101159', 'step': 18048, 'epoch': 3} {'type': 'loss', 'content': 0.07491657882928848, 'timestamp': '2025-09-30 22:40:57.104934', 'step': 18049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:57.176631', 'step': 18049, 'epoch': 3} {'type': 'loss', 'content': 0.08961820602416992, 'timestamp': '2025-09-30 22:40:57.180199', 'step': 18050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:57.248301', 'step': 18050, 'epoch': 3} {'type': 'loss', 'content': 0.10657447576522827, 'timestamp': '2025-09-30 22:40:57.252939', 'step': 18051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:57.311731', 'step': 18051, 'epoch': 3} {'type': 'loss', 'content': 0.11639854311943054, 'timestamp': '2025-09-30 22:40:57.318318', 'step': 18052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:57.398503', 'step': 18052, 'epoch': 3} {'type': 'loss', 'content': 0.13988643884658813, 'timestamp': '2025-09-30 22:40:57.401906', 'step': 18053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:57.459338', 'step': 18053, 'epoch': 3} {'type': 'loss', 'content': 0.05862044170498848, 'timestamp': '2025-09-30 22:40:57.463412', 'step': 18054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:57.520507', 'step': 18054, 'epoch': 3} {'type': 'loss', 'content': 0.10657484084367752, 'timestamp': '2025-09-30 22:40:57.524079', 'step': 18055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:57.582461', 'step': 18055, 'epoch': 3} {'type': 'loss', 'content': 0.12843193113803864, 'timestamp': '2025-09-30 22:40:57.588843', 'step': 18056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:57.654500', 'step': 18056, 'epoch': 3} {'type': 'loss', 'content': 0.0835971087217331, 'timestamp': '2025-09-30 22:40:57.657577', 'step': 18057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:57.715673', 'step': 18057, 'epoch': 3} {'type': 'loss', 'content': 0.06784581393003464, 'timestamp': '2025-09-30 22:40:57.720814', 'step': 18058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:57.788366', 'step': 18058, 'epoch': 3} {'type': 'loss', 'content': 0.06357641518115997, 'timestamp': '2025-09-30 22:40:57.794935', 'step': 18059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:57.852354', 'step': 18059, 'epoch': 3} {'type': 'loss', 'content': 0.15749123692512512, 'timestamp': '2025-09-30 22:40:57.859019', 'step': 18060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:40:57.915287', 'step': 18060, 'epoch': 3} {'type': 'loss', 'content': 0.14709877967834473, 'timestamp': '2025-09-30 22:40:57.918747', 'step': 18061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:57.976740', 'step': 18061, 'epoch': 3} {'type': 'loss', 'content': 0.03838232904672623, 'timestamp': '2025-09-30 22:40:57.981465', 'step': 18062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:58.040174', 'step': 18062, 'epoch': 3} {'type': 'loss', 'content': 0.15718267858028412, 'timestamp': '2025-09-30 22:40:58.057359', 'step': 18063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:58.126642', 'step': 18063, 'epoch': 3} {'type': 'loss', 'content': 0.06425779312849045, 'timestamp': '2025-09-30 22:40:58.133023', 'step': 18064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:58.191056', 'step': 18064, 'epoch': 3} {'type': 'loss', 'content': 0.1281195431947708, 'timestamp': '2025-09-30 22:40:58.194418', 'step': 18065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:58.252178', 'step': 18065, 'epoch': 3} {'type': 'loss', 'content': 0.12108974158763885, 'timestamp': '2025-09-30 22:40:58.267390', 'step': 18066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:58.327857', 'step': 18066, 'epoch': 3} {'type': 'loss', 'content': 0.09724143892526627, 'timestamp': '2025-09-30 22:40:58.331358', 'step': 18067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:58.388549', 'step': 18067, 'epoch': 3} {'type': 'loss', 'content': 0.12608249485492706, 'timestamp': '2025-09-30 22:40:58.396151', 'step': 18068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:58.463524', 'step': 18068, 'epoch': 3} {'type': 'loss', 'content': 0.1082049012184143, 'timestamp': '2025-09-30 22:40:58.467206', 'step': 18069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:58.525156', 'step': 18069, 'epoch': 3} {'type': 'loss', 'content': 0.10508562624454498, 'timestamp': '2025-09-30 22:40:58.528919', 'step': 18070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:58.590867', 'step': 18070, 'epoch': 3} {'type': 'loss', 'content': 0.1066078171133995, 'timestamp': '2025-09-30 22:40:58.596458', 'step': 18071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:58.655398', 'step': 18071, 'epoch': 3} {'type': 'loss', 'content': 0.06391064077615738, 'timestamp': '2025-09-30 22:40:58.662086', 'step': 18072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:58.718815', 'step': 18072, 'epoch': 3} {'type': 'loss', 'content': 0.06515946239233017, 'timestamp': '2025-09-30 22:40:58.736018', 'step': 18073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:58.794243', 'step': 18073, 'epoch': 3} {'type': 'loss', 'content': 0.09966781735420227, 'timestamp': '2025-09-30 22:40:58.798720', 'step': 18074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:58.857157', 'step': 18074, 'epoch': 3} {'type': 'loss', 'content': 0.16156519949436188, 'timestamp': '2025-09-30 22:40:58.860639', 'step': 18075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:58.918208', 'step': 18075, 'epoch': 3} {'type': 'loss', 'content': 0.1029791459441185, 'timestamp': '2025-09-30 22:40:58.924709', 'step': 18076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:58.981590', 'step': 18076, 'epoch': 3} {'type': 'loss', 'content': 0.10097511857748032, 'timestamp': '2025-09-30 22:40:58.990641', 'step': 18077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:59.049391', 'step': 18077, 'epoch': 3} {'type': 'loss', 'content': 0.09477934241294861, 'timestamp': '2025-09-30 22:40:59.052583', 'step': 18078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:59.122306', 'step': 18078, 'epoch': 3} {'type': 'loss', 'content': 0.05483325570821762, 'timestamp': '2025-09-30 22:40:59.126915', 'step': 18079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:59.204756', 'step': 18079, 'epoch': 3} {'type': 'loss', 'content': 0.10675176978111267, 'timestamp': '2025-09-30 22:40:59.214723', 'step': 18080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:40:59.284674', 'step': 18080, 'epoch': 3} {'type': 'loss', 'content': 0.1337735503911972, 'timestamp': '2025-09-30 22:40:59.293746', 'step': 18081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:59.367611', 'step': 18081, 'epoch': 3} {'type': 'loss', 'content': 0.06914730370044708, 'timestamp': '2025-09-30 22:40:59.373798', 'step': 18082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:59.434095', 'step': 18082, 'epoch': 3} {'type': 'loss', 'content': 0.11115775257349014, 'timestamp': '2025-09-30 22:40:59.437266', 'step': 18083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:59.509211', 'step': 18083, 'epoch': 3} {'type': 'loss', 'content': 0.11367292702198029, 'timestamp': '2025-09-30 22:40:59.519943', 'step': 18084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:59.575962', 'step': 18084, 'epoch': 3} {'type': 'loss', 'content': 0.11736605316400528, 'timestamp': '2025-09-30 22:40:59.578910', 'step': 18085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:40:59.638807', 'step': 18085, 'epoch': 3} {'type': 'loss', 'content': 0.12597566843032837, 'timestamp': '2025-09-30 22:40:59.642090', 'step': 18086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:59.700745', 'step': 18086, 'epoch': 3} {'type': 'loss', 'content': 0.1838873326778412, 'timestamp': '2025-09-30 22:40:59.703839', 'step': 18087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:40:59.762338', 'step': 18087, 'epoch': 3} {'type': 'loss', 'content': 0.14825432002544403, 'timestamp': '2025-09-30 22:40:59.768550', 'step': 18088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:40:59.825227', 'step': 18088, 'epoch': 3} {'type': 'loss', 'content': 0.0529785081744194, 'timestamp': '2025-09-30 22:40:59.828013', 'step': 18089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:59.884773', 'step': 18089, 'epoch': 3} {'type': 'loss', 'content': 0.08287299424409866, 'timestamp': '2025-09-30 22:40:59.887745', 'step': 18090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:40:59.958149', 'step': 18090, 'epoch': 3} {'type': 'loss', 'content': 0.06996843963861465, 'timestamp': '2025-09-30 22:40:59.962780', 'step': 18091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:00.020194', 'step': 18091, 'epoch': 3} {'type': 'loss', 'content': 0.1426195502281189, 'timestamp': '2025-09-30 22:41:00.028482', 'step': 18092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:00.099617', 'step': 18092, 'epoch': 3} {'type': 'loss', 'content': 0.10603142529726028, 'timestamp': '2025-09-30 22:41:00.104763', 'step': 18093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:00.164779', 'step': 18093, 'epoch': 3} {'type': 'loss', 'content': 0.14927099645137787, 'timestamp': '2025-09-30 22:41:00.168105', 'step': 18094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:00.237558', 'step': 18094, 'epoch': 3} {'type': 'loss', 'content': 0.11809524148702621, 'timestamp': '2025-09-30 22:41:00.240371', 'step': 18095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:00.300808', 'step': 18095, 'epoch': 3} {'type': 'loss', 'content': 0.09442059695720673, 'timestamp': '2025-09-30 22:41:00.309905', 'step': 18096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:00.385797', 'step': 18096, 'epoch': 3} {'type': 'loss', 'content': 0.11873526871204376, 'timestamp': '2025-09-30 22:41:00.391806', 'step': 18097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:00.457850', 'step': 18097, 'epoch': 3} {'type': 'loss', 'content': 0.12601584196090698, 'timestamp': '2025-09-30 22:41:00.464323', 'step': 18098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:00.528625', 'step': 18098, 'epoch': 3} {'type': 'loss', 'content': 0.10992905497550964, 'timestamp': '2025-09-30 22:41:00.531512', 'step': 18099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:00.607439', 'step': 18099, 'epoch': 3} {'type': 'loss', 'content': 0.05871934816241264, 'timestamp': '2025-09-30 22:41:00.613826', 'step': 18100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:00.671819', 'step': 18100, 'epoch': 3} {'type': 'loss', 'content': 0.08495647460222244, 'timestamp': '2025-09-30 22:41:00.677636', 'step': 18101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:00.747443', 'step': 18101, 'epoch': 3} {'type': 'loss', 'content': 0.018303973600268364, 'timestamp': '2025-09-30 22:41:00.754140', 'step': 18102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:00.815989', 'step': 18102, 'epoch': 3} {'type': 'loss', 'content': 0.06733758002519608, 'timestamp': '2025-09-30 22:41:00.824404', 'step': 18103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:00.896201', 'step': 18103, 'epoch': 3} {'type': 'loss', 'content': 0.0804520770907402, 'timestamp': '2025-09-30 22:41:00.910664', 'step': 18104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:00.973413', 'step': 18104, 'epoch': 3} {'type': 'loss', 'content': 0.1796506941318512, 'timestamp': '2025-09-30 22:41:00.977063', 'step': 18105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:01.043787', 'step': 18105, 'epoch': 3} {'type': 'loss', 'content': 0.06098296865820885, 'timestamp': '2025-09-30 22:41:01.046266', 'step': 18106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:01.104782', 'step': 18106, 'epoch': 3} {'type': 'loss', 'content': 0.0839545950293541, 'timestamp': '2025-09-30 22:41:01.107729', 'step': 18107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:01.166053', 'step': 18107, 'epoch': 3} {'type': 'loss', 'content': 0.13510717451572418, 'timestamp': '2025-09-30 22:41:01.176169', 'step': 18108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:01.233382', 'step': 18108, 'epoch': 3} {'type': 'loss', 'content': 0.0894087702035904, 'timestamp': '2025-09-30 22:41:01.236250', 'step': 18109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:01.293678', 'step': 18109, 'epoch': 3} {'type': 'loss', 'content': 0.04280304163694382, 'timestamp': '2025-09-30 22:41:01.296414', 'step': 18110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:01.369129', 'step': 18110, 'epoch': 3} {'type': 'loss', 'content': 0.07518164813518524, 'timestamp': '2025-09-30 22:41:01.372494', 'step': 18111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:01.432774', 'step': 18111, 'epoch': 3} {'type': 'loss', 'content': 0.03403903543949127, 'timestamp': '2025-09-30 22:41:01.441934', 'step': 18112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:01.509632', 'step': 18112, 'epoch': 3} {'type': 'loss', 'content': 0.038950785994529724, 'timestamp': '2025-09-30 22:41:01.513376', 'step': 18113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:01.593727', 'step': 18113, 'epoch': 3} {'type': 'loss', 'content': 0.09629244357347488, 'timestamp': '2025-09-30 22:41:01.598819', 'step': 18114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:01.656151', 'step': 18114, 'epoch': 3} {'type': 'loss', 'content': 0.04496350884437561, 'timestamp': '2025-09-30 22:41:01.659233', 'step': 18115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:01.717855', 'step': 18115, 'epoch': 3} {'type': 'loss', 'content': 0.06239531934261322, 'timestamp': '2025-09-30 22:41:01.726740', 'step': 18116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:01.793709', 'step': 18116, 'epoch': 3} {'type': 'loss', 'content': 0.04973188415169716, 'timestamp': '2025-09-30 22:41:01.796403', 'step': 18117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:01.857496', 'step': 18117, 'epoch': 3} {'type': 'loss', 'content': 0.04408680647611618, 'timestamp': '2025-09-30 22:41:01.865475', 'step': 18118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:01.925723', 'step': 18118, 'epoch': 3} {'type': 'loss', 'content': 0.10211124271154404, 'timestamp': '2025-09-30 22:41:01.928794', 'step': 18119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:01.998502', 'step': 18119, 'epoch': 3} {'type': 'loss', 'content': 0.06823446601629257, 'timestamp': '2025-09-30 22:41:02.006069', 'step': 18120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:02.068107', 'step': 18120, 'epoch': 3} {'type': 'loss', 'content': 0.12295012176036835, 'timestamp': '2025-09-30 22:41:02.070821', 'step': 18121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:02.129088', 'step': 18121, 'epoch': 3} {'type': 'loss', 'content': 0.12130682170391083, 'timestamp': '2025-09-30 22:41:02.137981', 'step': 18122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:02.223021', 'step': 18122, 'epoch': 3} {'type': 'loss', 'content': 0.1689756214618683, 'timestamp': '2025-09-30 22:41:02.241020', 'step': 18123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:02.307581', 'step': 18123, 'epoch': 3} {'type': 'loss', 'content': 0.14993953704833984, 'timestamp': '2025-09-30 22:41:02.318471', 'step': 18124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:02.388963', 'step': 18124, 'epoch': 3} {'type': 'loss', 'content': 0.045251719653606415, 'timestamp': '2025-09-30 22:41:02.399320', 'step': 18125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:02.465076', 'step': 18125, 'epoch': 3} {'type': 'loss', 'content': 0.10750899463891983, 'timestamp': '2025-09-30 22:41:02.475532', 'step': 18126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:02.555274', 'step': 18126, 'epoch': 3} {'type': 'loss', 'content': 0.12134187668561935, 'timestamp': '2025-09-30 22:41:02.558337', 'step': 18127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:02.639536', 'step': 18127, 'epoch': 3} {'type': 'loss', 'content': 0.061541635543107986, 'timestamp': '2025-09-30 22:41:02.647662', 'step': 18128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:02.712119', 'step': 18128, 'epoch': 3} {'type': 'loss', 'content': 0.05146047845482826, 'timestamp': '2025-09-30 22:41:02.720451', 'step': 18129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:02.781781', 'step': 18129, 'epoch': 3} {'type': 'loss', 'content': 0.1404673010110855, 'timestamp': '2025-09-30 22:41:02.784624', 'step': 18130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:02.862537', 'step': 18130, 'epoch': 3} {'type': 'loss', 'content': 0.10018281638622284, 'timestamp': '2025-09-30 22:41:02.866261', 'step': 18131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:02.925333', 'step': 18131, 'epoch': 3} {'type': 'loss', 'content': 0.05650044232606888, 'timestamp': '2025-09-30 22:41:02.931625', 'step': 18132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:03.001543', 'step': 18132, 'epoch': 3} {'type': 'loss', 'content': 0.11871176958084106, 'timestamp': '2025-09-30 22:41:03.006402', 'step': 18133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:03.063836', 'step': 18133, 'epoch': 3} {'type': 'loss', 'content': 0.07718514651060104, 'timestamp': '2025-09-30 22:41:03.072989', 'step': 18134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:03.148927', 'step': 18134, 'epoch': 3} {'type': 'loss', 'content': 0.10895191878080368, 'timestamp': '2025-09-30 22:41:03.160461', 'step': 18135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:03.238654', 'step': 18135, 'epoch': 3} {'type': 'loss', 'content': 0.08570663630962372, 'timestamp': '2025-09-30 22:41:03.245483', 'step': 18136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:03.305422', 'step': 18136, 'epoch': 3} {'type': 'loss', 'content': 0.16966095566749573, 'timestamp': '2025-09-30 22:41:03.309361', 'step': 18137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:03.387369', 'step': 18137, 'epoch': 3} {'type': 'loss', 'content': 0.08931481838226318, 'timestamp': '2025-09-30 22:41:03.391223', 'step': 18138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:03.453175', 'step': 18138, 'epoch': 3} {'type': 'loss', 'content': 0.09769724309444427, 'timestamp': '2025-09-30 22:41:03.456558', 'step': 18139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:03.515498', 'step': 18139, 'epoch': 3} {'type': 'loss', 'content': 0.07634259760379791, 'timestamp': '2025-09-30 22:41:03.522662', 'step': 18140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:03.598143', 'step': 18140, 'epoch': 3} {'type': 'loss', 'content': 0.11568998545408249, 'timestamp': '2025-09-30 22:41:03.601889', 'step': 18141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:03.666642', 'step': 18141, 'epoch': 3} {'type': 'loss', 'content': 0.09372425824403763, 'timestamp': '2025-09-30 22:41:03.677024', 'step': 18142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:03.739129', 'step': 18142, 'epoch': 3} {'type': 'loss', 'content': 0.15418310463428497, 'timestamp': '2025-09-30 22:41:03.743546', 'step': 18143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:41:03.803438', 'step': 18143, 'epoch': 3} {'type': 'loss', 'content': 0.13024480640888214, 'timestamp': '2025-09-30 22:41:03.811582', 'step': 18144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:03.873074', 'step': 18144, 'epoch': 3} {'type': 'loss', 'content': 0.04082384705543518, 'timestamp': '2025-09-30 22:41:03.879266', 'step': 18145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:03.959573', 'step': 18145, 'epoch': 3} {'type': 'loss', 'content': 0.08244366943836212, 'timestamp': '2025-09-30 22:41:03.964478', 'step': 18146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:04.024979', 'step': 18146, 'epoch': 3} {'type': 'loss', 'content': 0.13379333913326263, 'timestamp': '2025-09-30 22:41:04.041731', 'step': 18147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:04.102657', 'step': 18147, 'epoch': 3} {'type': 'loss', 'content': 0.20096679031848907, 'timestamp': '2025-09-30 22:41:04.110766', 'step': 18148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:04.194905', 'step': 18148, 'epoch': 3} {'type': 'loss', 'content': 0.06886719912290573, 'timestamp': '2025-09-30 22:41:04.201081', 'step': 18149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:04.259526', 'step': 18149, 'epoch': 3} {'type': 'loss', 'content': 0.11765007674694061, 'timestamp': '2025-09-30 22:41:04.274973', 'step': 18150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:41:04.335635', 'step': 18150, 'epoch': 3} {'type': 'loss', 'content': 0.09334749728441238, 'timestamp': '2025-09-30 22:41:04.340744', 'step': 18151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:04.398523', 'step': 18151, 'epoch': 3} {'type': 'loss', 'content': 0.06719385087490082, 'timestamp': '2025-09-30 22:41:04.415592', 'step': 18152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:04.482057', 'step': 18152, 'epoch': 3} {'type': 'loss', 'content': 0.037485621869564056, 'timestamp': '2025-09-30 22:41:04.486361', 'step': 18153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:04.548504', 'step': 18153, 'epoch': 3} {'type': 'loss', 'content': 0.0705290287733078, 'timestamp': '2025-09-30 22:41:04.554035', 'step': 18154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:04.624472', 'step': 18154, 'epoch': 3} {'type': 'loss', 'content': 0.244540736079216, 'timestamp': '2025-09-30 22:41:04.637080', 'step': 18155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:04.714144', 'step': 18155, 'epoch': 3} {'type': 'loss', 'content': 0.08499791473150253, 'timestamp': '2025-09-30 22:41:04.729842', 'step': 18156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:04.809980', 'step': 18156, 'epoch': 3} {'type': 'loss', 'content': 0.09966584295034409, 'timestamp': '2025-09-30 22:41:04.813499', 'step': 18157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:04.888768', 'step': 18157, 'epoch': 3} {'type': 'loss', 'content': 0.13136525452136993, 'timestamp': '2025-09-30 22:41:04.892286', 'step': 18158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:04.964170', 'step': 18158, 'epoch': 3} {'type': 'loss', 'content': 0.10341494530439377, 'timestamp': '2025-09-30 22:41:04.967536', 'step': 18159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:05.027689', 'step': 18159, 'epoch': 3} {'type': 'loss', 'content': 0.09725058078765869, 'timestamp': '2025-09-30 22:41:05.037196', 'step': 18160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:05.095234', 'step': 18160, 'epoch': 3} {'type': 'loss', 'content': 0.07927780598402023, 'timestamp': '2025-09-30 22:41:05.099034', 'step': 18161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:05.171882', 'step': 18161, 'epoch': 3} {'type': 'loss', 'content': 0.19115135073661804, 'timestamp': '2025-09-30 22:41:05.187015', 'step': 18162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:05.268512', 'step': 18162, 'epoch': 3} {'type': 'loss', 'content': 0.06128189340233803, 'timestamp': '2025-09-30 22:41:05.275229', 'step': 18163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:05.350171', 'step': 18163, 'epoch': 3} {'type': 'loss', 'content': 0.14840392768383026, 'timestamp': '2025-09-30 22:41:05.370724', 'step': 18164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:05.437883', 'step': 18164, 'epoch': 3} {'type': 'loss', 'content': 0.08468741923570633, 'timestamp': '2025-09-30 22:41:05.441780', 'step': 18165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:05.500788', 'step': 18165, 'epoch': 3} {'type': 'loss', 'content': 0.1549394726753235, 'timestamp': '2025-09-30 22:41:05.505968', 'step': 18166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:05.564913', 'step': 18166, 'epoch': 3} {'type': 'loss', 'content': 0.036355022341012955, 'timestamp': '2025-09-30 22:41:05.568387', 'step': 18167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:05.627689', 'step': 18167, 'epoch': 3} {'type': 'loss', 'content': 0.17651104927062988, 'timestamp': '2025-09-30 22:41:05.635512', 'step': 18168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:05.696604', 'step': 18168, 'epoch': 3} {'type': 'loss', 'content': 0.12264610826969147, 'timestamp': '2025-09-30 22:41:05.707599', 'step': 18169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:05.768632', 'step': 18169, 'epoch': 3} {'type': 'loss', 'content': 0.05984357371926308, 'timestamp': '2025-09-30 22:41:05.772402', 'step': 18170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:05.848001', 'step': 18170, 'epoch': 3} {'type': 'loss', 'content': 0.044503964483737946, 'timestamp': '2025-09-30 22:41:05.851898', 'step': 18171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:05.927838', 'step': 18171, 'epoch': 3} {'type': 'loss', 'content': 0.06344148516654968, 'timestamp': '2025-09-30 22:41:05.935726', 'step': 18172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:05.994967', 'step': 18172, 'epoch': 3} {'type': 'loss', 'content': 0.06352442502975464, 'timestamp': '2025-09-30 22:41:06.009228', 'step': 18173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:06.069239', 'step': 18173, 'epoch': 3} {'type': 'loss', 'content': 0.08767205476760864, 'timestamp': '2025-09-30 22:41:06.073196', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:41:20.392621', 'step': 18174, 'epoch': 3} {'type': 'pplx', 'content': 9047.985030070504, 'timestamp': '2025-09-30 22:41:20.421366', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:20.481972', 'step': 18174, 'epoch': 3} {'type': 'loss', 'content': 0.08547777682542801, 'timestamp': '2025-09-30 22:41:20.486274', 'step': 18175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:20.545202', 'step': 18175, 'epoch': 3} {'type': 'loss', 'content': 0.062304604798555374, 'timestamp': '2025-09-30 22:41:20.555126', 'step': 18176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:20.612850', 'step': 18176, 'epoch': 3} {'type': 'loss', 'content': 0.06737349182367325, 'timestamp': '2025-09-30 22:41:20.616033', 'step': 18177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:20.673321', 'step': 18177, 'epoch': 3} {'type': 'loss', 'content': 0.10927077382802963, 'timestamp': '2025-09-30 22:41:20.678706', 'step': 18178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:20.738173', 'step': 18178, 'epoch': 3} {'type': 'loss', 'content': 0.11832666397094727, 'timestamp': '2025-09-30 22:41:20.742179', 'step': 18179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:20.802809', 'step': 18179, 'epoch': 3} {'type': 'loss', 'content': 0.09209517389535904, 'timestamp': '2025-09-30 22:41:20.810069', 'step': 18180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:20.868680', 'step': 18180, 'epoch': 3} {'type': 'loss', 'content': 0.11299164593219757, 'timestamp': '2025-09-30 22:41:20.872788', 'step': 18181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:20.951789', 'step': 18181, 'epoch': 3} {'type': 'loss', 'content': 0.033525776118040085, 'timestamp': '2025-09-30 22:41:20.955169', 'step': 18182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:21.015001', 'step': 18182, 'epoch': 3} {'type': 'loss', 'content': 0.0880005732178688, 'timestamp': '2025-09-30 22:41:21.019250', 'step': 18183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:41:21.083073', 'step': 18183, 'epoch': 3} {'type': 'loss', 'content': 0.09446630626916885, 'timestamp': '2025-09-30 22:41:21.090413', 'step': 18184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:21.149062', 'step': 18184, 'epoch': 3} {'type': 'loss', 'content': 0.08473782241344452, 'timestamp': '2025-09-30 22:41:21.152757', 'step': 18185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:21.210428', 'step': 18185, 'epoch': 3} {'type': 'loss', 'content': 0.17149972915649414, 'timestamp': '2025-09-30 22:41:21.213857', 'step': 18186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:21.290848', 'step': 18186, 'epoch': 3} {'type': 'loss', 'content': 0.15766948461532593, 'timestamp': '2025-09-30 22:41:21.298848', 'step': 18187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:21.357182', 'step': 18187, 'epoch': 3} {'type': 'loss', 'content': 0.10649629682302475, 'timestamp': '2025-09-30 22:41:21.363715', 'step': 18188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:21.420946', 'step': 18188, 'epoch': 3} {'type': 'loss', 'content': 0.11600372940301895, 'timestamp': '2025-09-30 22:41:21.426749', 'step': 18189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:21.483431', 'step': 18189, 'epoch': 3} {'type': 'loss', 'content': 0.07673055678606033, 'timestamp': '2025-09-30 22:41:21.487027', 'step': 18190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:21.545848', 'step': 18190, 'epoch': 3} {'type': 'loss', 'content': 0.08559169620275497, 'timestamp': '2025-09-30 22:41:21.549085', 'step': 18191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:21.613194', 'step': 18191, 'epoch': 3} {'type': 'loss', 'content': 0.0151963597163558, 'timestamp': '2025-09-30 22:41:21.620625', 'step': 18192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:21.683540', 'step': 18192, 'epoch': 3} {'type': 'loss', 'content': 0.11085564643144608, 'timestamp': '2025-09-30 22:41:21.686645', 'step': 18193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:21.748422', 'step': 18193, 'epoch': 3} {'type': 'loss', 'content': 0.06824811547994614, 'timestamp': '2025-09-30 22:41:21.751473', 'step': 18194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:21.813667', 'step': 18194, 'epoch': 3} {'type': 'loss', 'content': 0.04295854642987251, 'timestamp': '2025-09-30 22:41:21.828737', 'step': 18195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:21.890450', 'step': 18195, 'epoch': 3} {'type': 'loss', 'content': 0.11646615713834763, 'timestamp': '2025-09-30 22:41:21.897666', 'step': 18196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:21.953860', 'step': 18196, 'epoch': 3} {'type': 'loss', 'content': 0.09610416740179062, 'timestamp': '2025-09-30 22:41:21.961742', 'step': 18197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:22.023110', 'step': 18197, 'epoch': 3} {'type': 'loss', 'content': 0.07135668396949768, 'timestamp': '2025-09-30 22:41:22.026139', 'step': 18198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:22.084682', 'step': 18198, 'epoch': 3} {'type': 'loss', 'content': 0.17639769613742828, 'timestamp': '2025-09-30 22:41:22.087641', 'step': 18199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:22.147988', 'step': 18199, 'epoch': 3} {'type': 'loss', 'content': 0.12619605660438538, 'timestamp': '2025-09-30 22:41:22.156201', 'step': 18200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:22.214063', 'step': 18200, 'epoch': 3} {'type': 'loss', 'content': 0.056342627853155136, 'timestamp': '2025-09-30 22:41:22.217958', 'step': 18201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:22.276418', 'step': 18201, 'epoch': 3} {'type': 'loss', 'content': 0.043120793998241425, 'timestamp': '2025-09-30 22:41:22.278871', 'step': 18202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:22.337663', 'step': 18202, 'epoch': 3} {'type': 'loss', 'content': 0.15143847465515137, 'timestamp': '2025-09-30 22:41:22.341244', 'step': 18203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:22.410126', 'step': 18203, 'epoch': 3} {'type': 'loss', 'content': 0.08337979763746262, 'timestamp': '2025-09-30 22:41:22.417102', 'step': 18204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:22.478632', 'step': 18204, 'epoch': 3} {'type': 'loss', 'content': 0.18343022465705872, 'timestamp': '2025-09-30 22:41:22.489801', 'step': 18205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:22.548986', 'step': 18205, 'epoch': 3} {'type': 'loss', 'content': 0.0712418258190155, 'timestamp': '2025-09-30 22:41:22.554326', 'step': 18206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:22.623625', 'step': 18206, 'epoch': 3} {'type': 'loss', 'content': 0.10309208184480667, 'timestamp': '2025-09-30 22:41:22.627148', 'step': 18207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:22.687761', 'step': 18207, 'epoch': 3} {'type': 'loss', 'content': 0.15307681262493134, 'timestamp': '2025-09-30 22:41:22.697093', 'step': 18208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:22.755511', 'step': 18208, 'epoch': 3} {'type': 'loss', 'content': 0.07439018785953522, 'timestamp': '2025-09-30 22:41:22.758238', 'step': 18209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:22.817276', 'step': 18209, 'epoch': 3} {'type': 'loss', 'content': 0.20025917887687683, 'timestamp': '2025-09-30 22:41:22.826748', 'step': 18210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:22.894540', 'step': 18210, 'epoch': 3} {'type': 'loss', 'content': 0.10682930797338486, 'timestamp': '2025-09-30 22:41:22.898577', 'step': 18211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:22.957851', 'step': 18211, 'epoch': 3} {'type': 'loss', 'content': 0.010485030710697174, 'timestamp': '2025-09-30 22:41:22.965541', 'step': 18212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:23.027168', 'step': 18212, 'epoch': 3} {'type': 'loss', 'content': 0.15535861253738403, 'timestamp': '2025-09-30 22:41:23.034670', 'step': 18213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:23.108282', 'step': 18213, 'epoch': 3} {'type': 'loss', 'content': 0.11812159419059753, 'timestamp': '2025-09-30 22:41:23.112290', 'step': 18214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:23.182967', 'step': 18214, 'epoch': 3} {'type': 'loss', 'content': 0.06899340450763702, 'timestamp': '2025-09-30 22:41:23.185557', 'step': 18215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:23.252040', 'step': 18215, 'epoch': 3} {'type': 'loss', 'content': 0.0665334165096283, 'timestamp': '2025-09-30 22:41:23.260947', 'step': 18216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:23.322300', 'step': 18216, 'epoch': 3} {'type': 'loss', 'content': 0.0763288140296936, 'timestamp': '2025-09-30 22:41:23.326632', 'step': 18217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:23.386315', 'step': 18217, 'epoch': 3} {'type': 'loss', 'content': 0.07918564975261688, 'timestamp': '2025-09-30 22:41:23.391464', 'step': 18218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:23.465762', 'step': 18218, 'epoch': 3} {'type': 'loss', 'content': 0.03652802109718323, 'timestamp': '2025-09-30 22:41:23.471636', 'step': 18219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:23.529861', 'step': 18219, 'epoch': 3} {'type': 'loss', 'content': 0.10430271923542023, 'timestamp': '2025-09-30 22:41:23.550337', 'step': 18220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:23.610399', 'step': 18220, 'epoch': 3} {'type': 'loss', 'content': 0.06964536756277084, 'timestamp': '2025-09-30 22:41:23.615611', 'step': 18221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:23.685587', 'step': 18221, 'epoch': 3} {'type': 'loss', 'content': 0.13402104377746582, 'timestamp': '2025-09-30 22:41:23.691215', 'step': 18222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:23.749300', 'step': 18222, 'epoch': 3} {'type': 'loss', 'content': 0.08171647787094116, 'timestamp': '2025-09-30 22:41:23.758500', 'step': 18223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:23.820690', 'step': 18223, 'epoch': 3} {'type': 'loss', 'content': 0.06653854995965958, 'timestamp': '2025-09-30 22:41:23.828908', 'step': 18224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:23.887247', 'step': 18224, 'epoch': 3} {'type': 'loss', 'content': 0.16308756172657013, 'timestamp': '2025-09-30 22:41:23.891861', 'step': 18225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:23.958146', 'step': 18225, 'epoch': 3} {'type': 'loss', 'content': 0.054173197597265244, 'timestamp': '2025-09-30 22:41:23.966211', 'step': 18226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:24.029797', 'step': 18226, 'epoch': 3} {'type': 'loss', 'content': 0.08435608446598053, 'timestamp': '2025-09-30 22:41:24.036634', 'step': 18227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:24.094331', 'step': 18227, 'epoch': 3} {'type': 'loss', 'content': 0.10733714699745178, 'timestamp': '2025-09-30 22:41:24.102518', 'step': 18228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:24.161133', 'step': 18228, 'epoch': 3} {'type': 'loss', 'content': 0.0956011563539505, 'timestamp': '2025-09-30 22:41:24.165440', 'step': 18229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:24.224256', 'step': 18229, 'epoch': 3} {'type': 'loss', 'content': 0.055967845022678375, 'timestamp': '2025-09-30 22:41:24.244183', 'step': 18230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:24.307472', 'step': 18230, 'epoch': 3} {'type': 'loss', 'content': 0.12090560048818588, 'timestamp': '2025-09-30 22:41:24.312055', 'step': 18231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:24.385329', 'step': 18231, 'epoch': 3} {'type': 'loss', 'content': 0.07770242542028427, 'timestamp': '2025-09-30 22:41:24.408556', 'step': 18232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:24.480698', 'step': 18232, 'epoch': 3} {'type': 'loss', 'content': 0.06818786263465881, 'timestamp': '2025-09-30 22:41:24.483863', 'step': 18233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:24.542443', 'step': 18233, 'epoch': 3} {'type': 'loss', 'content': 0.09331398457288742, 'timestamp': '2025-09-30 22:41:24.545425', 'step': 18234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:24.603237', 'step': 18234, 'epoch': 3} {'type': 'loss', 'content': 0.10266482830047607, 'timestamp': '2025-09-30 22:41:24.608088', 'step': 18235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:24.669681', 'step': 18235, 'epoch': 3} {'type': 'loss', 'content': 0.074334055185318, 'timestamp': '2025-09-30 22:41:24.689465', 'step': 18236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:24.766391', 'step': 18236, 'epoch': 3} {'type': 'loss', 'content': 0.04720774665474892, 'timestamp': '2025-09-30 22:41:24.785434', 'step': 18237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:24.846675', 'step': 18237, 'epoch': 3} {'type': 'loss', 'content': 0.1406199038028717, 'timestamp': '2025-09-30 22:41:24.851111', 'step': 18238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:24.927195', 'step': 18238, 'epoch': 3} {'type': 'loss', 'content': 0.1395079493522644, 'timestamp': '2025-09-30 22:41:24.932225', 'step': 18239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:24.992012', 'step': 18239, 'epoch': 3} {'type': 'loss', 'content': 0.04336216300725937, 'timestamp': '2025-09-30 22:41:25.017662', 'step': 18240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:25.078101', 'step': 18240, 'epoch': 3} {'type': 'loss', 'content': 0.14135321974754333, 'timestamp': '2025-09-30 22:41:25.081773', 'step': 18241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:25.155160', 'step': 18241, 'epoch': 3} {'type': 'loss', 'content': 0.03726411238312721, 'timestamp': '2025-09-30 22:41:25.176921', 'step': 18242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:41:25.253257', 'step': 18242, 'epoch': 3} {'type': 'loss', 'content': 0.02652727998793125, 'timestamp': '2025-09-30 22:41:25.257188', 'step': 18243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:25.315398', 'step': 18243, 'epoch': 3} {'type': 'loss', 'content': 0.04414370656013489, 'timestamp': '2025-09-30 22:41:25.326863', 'step': 18244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:25.388387', 'step': 18244, 'epoch': 3} {'type': 'loss', 'content': 0.15521235764026642, 'timestamp': '2025-09-30 22:41:25.391237', 'step': 18245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:25.452899', 'step': 18245, 'epoch': 3} {'type': 'loss', 'content': 0.08489906787872314, 'timestamp': '2025-09-30 22:41:25.456387', 'step': 18246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:25.515657', 'step': 18246, 'epoch': 3} {'type': 'loss', 'content': 0.12474074959754944, 'timestamp': '2025-09-30 22:41:25.519416', 'step': 18247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:41:25.579123', 'step': 18247, 'epoch': 3} {'type': 'loss', 'content': 0.10743686556816101, 'timestamp': '2025-09-30 22:41:25.604814', 'step': 18248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:25.662850', 'step': 18248, 'epoch': 3} {'type': 'loss', 'content': 0.1426413357257843, 'timestamp': '2025-09-30 22:41:25.667686', 'step': 18249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:25.728030', 'step': 18249, 'epoch': 3} {'type': 'loss', 'content': 0.1002868190407753, 'timestamp': '2025-09-30 22:41:25.733356', 'step': 18250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:25.810503', 'step': 18250, 'epoch': 3} {'type': 'loss', 'content': 0.0715147852897644, 'timestamp': '2025-09-30 22:41:25.831659', 'step': 18251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:25.890065', 'step': 18251, 'epoch': 3} {'type': 'loss', 'content': 0.0837678611278534, 'timestamp': '2025-09-30 22:41:25.898404', 'step': 18252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:25.962135', 'step': 18252, 'epoch': 3} {'type': 'loss', 'content': 0.05299772694706917, 'timestamp': '2025-09-30 22:41:25.980696', 'step': 18253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:26.043993', 'step': 18253, 'epoch': 3} {'type': 'loss', 'content': 0.09390836954116821, 'timestamp': '2025-09-30 22:41:26.058319', 'step': 18254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:26.120001', 'step': 18254, 'epoch': 3} {'type': 'loss', 'content': 0.03276025131344795, 'timestamp': '2025-09-30 22:41:26.124026', 'step': 18255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:26.191528', 'step': 18255, 'epoch': 3} {'type': 'loss', 'content': 0.14805921912193298, 'timestamp': '2025-09-30 22:41:26.198737', 'step': 18256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:26.257780', 'step': 18256, 'epoch': 3} {'type': 'loss', 'content': 0.04606296867132187, 'timestamp': '2025-09-30 22:41:26.260841', 'step': 18257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:26.322824', 'step': 18257, 'epoch': 3} {'type': 'loss', 'content': 0.06724367290735245, 'timestamp': '2025-09-30 22:41:26.326314', 'step': 18258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:26.384096', 'step': 18258, 'epoch': 3} {'type': 'loss', 'content': 0.15062390267848969, 'timestamp': '2025-09-30 22:41:26.398256', 'step': 18259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 8640052517568.0}, 'timestamp': '2025-09-30 22:41:26.481188', 'step': 18259, 'epoch': 3} {'type': 'loss', 'content': 0.06947819143533707, 'timestamp': '2025-09-30 22:41:26.494098', 'step': 18260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:26.552132', 'step': 18260, 'epoch': 3} {'type': 'loss', 'content': 0.12829989194869995, 'timestamp': '2025-09-30 22:41:26.556015', 'step': 18261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:26.613554', 'step': 18261, 'epoch': 3} {'type': 'loss', 'content': 0.1694795787334442, 'timestamp': '2025-09-30 22:41:26.625280', 'step': 18262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:26.694628', 'step': 18262, 'epoch': 3} {'type': 'loss', 'content': 0.09247670322656631, 'timestamp': '2025-09-30 22:41:26.697892', 'step': 18263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:26.756518', 'step': 18263, 'epoch': 3} {'type': 'loss', 'content': 0.1304921954870224, 'timestamp': '2025-09-30 22:41:26.775207', 'step': 18264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:26.834867', 'step': 18264, 'epoch': 3} {'type': 'loss', 'content': 0.09375453740358353, 'timestamp': '2025-09-30 22:41:26.839294', 'step': 18265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:26.896662', 'step': 18265, 'epoch': 3} {'type': 'loss', 'content': 0.11382877081632614, 'timestamp': '2025-09-30 22:41:26.914062', 'step': 18266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:26.984798', 'step': 18266, 'epoch': 3} {'type': 'loss', 'content': 0.09164842218160629, 'timestamp': '2025-09-30 22:41:26.988936', 'step': 18267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:27.060329', 'step': 18267, 'epoch': 3} {'type': 'loss', 'content': 0.05916363373398781, 'timestamp': '2025-09-30 22:41:27.070663', 'step': 18268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:27.140780', 'step': 18268, 'epoch': 3} {'type': 'loss', 'content': 0.1318589299917221, 'timestamp': '2025-09-30 22:41:27.143900', 'step': 18269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:27.202179', 'step': 18269, 'epoch': 3} {'type': 'loss', 'content': 0.05870939418673515, 'timestamp': '2025-09-30 22:41:27.206216', 'step': 18270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:27.268598', 'step': 18270, 'epoch': 3} {'type': 'loss', 'content': 0.08416805416345596, 'timestamp': '2025-09-30 22:41:27.273652', 'step': 18271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:27.347061', 'step': 18271, 'epoch': 3} {'type': 'loss', 'content': 0.06672469526529312, 'timestamp': '2025-09-30 22:41:27.353976', 'step': 18272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:27.411670', 'step': 18272, 'epoch': 3} {'type': 'loss', 'content': 0.06162499263882637, 'timestamp': '2025-09-30 22:41:27.420455', 'step': 18273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:27.477710', 'step': 18273, 'epoch': 3} {'type': 'loss', 'content': 0.08952523022890091, 'timestamp': '2025-09-30 22:41:27.494180', 'step': 18274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:27.555427', 'step': 18274, 'epoch': 3} {'type': 'loss', 'content': 0.14677992463111877, 'timestamp': '2025-09-30 22:41:27.560008', 'step': 18275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:27.630840', 'step': 18275, 'epoch': 3} {'type': 'loss', 'content': 0.1288144290447235, 'timestamp': '2025-09-30 22:41:27.642619', 'step': 18276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:27.708822', 'step': 18276, 'epoch': 3} {'type': 'loss', 'content': 0.1029534637928009, 'timestamp': '2025-09-30 22:41:27.712918', 'step': 18277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:27.782690', 'step': 18277, 'epoch': 3} {'type': 'loss', 'content': 0.07011161744594574, 'timestamp': '2025-09-30 22:41:27.787882', 'step': 18278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:27.847108', 'step': 18278, 'epoch': 3} {'type': 'loss', 'content': 0.05004001408815384, 'timestamp': '2025-09-30 22:41:27.851276', 'step': 18279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:27.910381', 'step': 18279, 'epoch': 3} {'type': 'loss', 'content': 0.1416783183813095, 'timestamp': '2025-09-30 22:41:27.917156', 'step': 18280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:27.975789', 'step': 18280, 'epoch': 3} {'type': 'loss', 'content': 0.12221872806549072, 'timestamp': '2025-09-30 22:41:27.991375', 'step': 18281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:28.061914', 'step': 18281, 'epoch': 3} {'type': 'loss', 'content': 0.11502794921398163, 'timestamp': '2025-09-30 22:41:28.066749', 'step': 18282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:28.126212', 'step': 18282, 'epoch': 3} {'type': 'loss', 'content': 0.013218461535871029, 'timestamp': '2025-09-30 22:41:28.131121', 'step': 18283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:28.193572', 'step': 18283, 'epoch': 3} {'type': 'loss', 'content': 0.05886463075876236, 'timestamp': '2025-09-30 22:41:28.201299', 'step': 18284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:28.259665', 'step': 18284, 'epoch': 3} {'type': 'loss', 'content': 0.1635919064283371, 'timestamp': '2025-09-30 22:41:28.264642', 'step': 18285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:28.325464', 'step': 18285, 'epoch': 3} {'type': 'loss', 'content': 0.03325020521879196, 'timestamp': '2025-09-30 22:41:28.330788', 'step': 18286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:28.390334', 'step': 18286, 'epoch': 3} {'type': 'loss', 'content': 0.13206571340560913, 'timestamp': '2025-09-30 22:41:28.406048', 'step': 18287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:41:28.476254', 'step': 18287, 'epoch': 3} {'type': 'loss', 'content': 0.06655163317918777, 'timestamp': '2025-09-30 22:41:28.487010', 'step': 18288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:28.544660', 'step': 18288, 'epoch': 3} {'type': 'loss', 'content': 0.09473113715648651, 'timestamp': '2025-09-30 22:41:28.550271', 'step': 18289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:28.617314', 'step': 18289, 'epoch': 3} {'type': 'loss', 'content': 0.09271430969238281, 'timestamp': '2025-09-30 22:41:28.622754', 'step': 18290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:28.682173', 'step': 18290, 'epoch': 3} {'type': 'loss', 'content': 0.1216743066906929, 'timestamp': '2025-09-30 22:41:28.686353', 'step': 18291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:28.744961', 'step': 18291, 'epoch': 3} {'type': 'loss', 'content': 0.08525396883487701, 'timestamp': '2025-09-30 22:41:28.752687', 'step': 18292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:28.821448', 'step': 18292, 'epoch': 3} {'type': 'loss', 'content': 0.07979951053857803, 'timestamp': '2025-09-30 22:41:28.826104', 'step': 18293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:28.884684', 'step': 18293, 'epoch': 3} {'type': 'loss', 'content': 0.09577173739671707, 'timestamp': '2025-09-30 22:41:28.888483', 'step': 18294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:28.952104', 'step': 18294, 'epoch': 3} {'type': 'loss', 'content': 0.04866908863186836, 'timestamp': '2025-09-30 22:41:28.969737', 'step': 18295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:29.031522', 'step': 18295, 'epoch': 3} {'type': 'loss', 'content': 0.16267849504947662, 'timestamp': '2025-09-30 22:41:29.043904', 'step': 18296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:29.100664', 'step': 18296, 'epoch': 3} {'type': 'loss', 'content': 0.04780247062444687, 'timestamp': '2025-09-30 22:41:29.106247', 'step': 18297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:29.164898', 'step': 18297, 'epoch': 3} {'type': 'loss', 'content': 0.09707413613796234, 'timestamp': '2025-09-30 22:41:29.181531', 'step': 18298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:29.245260', 'step': 18298, 'epoch': 3} {'type': 'loss', 'content': 0.10653398931026459, 'timestamp': '2025-09-30 22:41:29.249863', 'step': 18299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:29.323195', 'step': 18299, 'epoch': 3} {'type': 'loss', 'content': 0.17121663689613342, 'timestamp': '2025-09-30 22:41:29.334378', 'step': 18300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:29.395463', 'step': 18300, 'epoch': 3} {'type': 'loss', 'content': 0.10168029367923737, 'timestamp': '2025-09-30 22:41:29.398728', 'step': 18301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:29.459722', 'step': 18301, 'epoch': 3} {'type': 'loss', 'content': 0.13767114281654358, 'timestamp': '2025-09-30 22:41:29.466859', 'step': 18302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:29.528406', 'step': 18302, 'epoch': 3} {'type': 'loss', 'content': 0.12566232681274414, 'timestamp': '2025-09-30 22:41:29.534460', 'step': 18303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:29.593456', 'step': 18303, 'epoch': 3} {'type': 'loss', 'content': 0.05436690151691437, 'timestamp': '2025-09-30 22:41:29.604046', 'step': 18304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:29.662673', 'step': 18304, 'epoch': 3} {'type': 'loss', 'content': 0.07832605391740799, 'timestamp': '2025-09-30 22:41:29.667126', 'step': 18305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:29.730859', 'step': 18305, 'epoch': 3} {'type': 'loss', 'content': 0.14281171560287476, 'timestamp': '2025-09-30 22:41:29.735156', 'step': 18306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:29.800430', 'step': 18306, 'epoch': 3} {'type': 'loss', 'content': 0.10165616124868393, 'timestamp': '2025-09-30 22:41:29.807190', 'step': 18307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:29.869767', 'step': 18307, 'epoch': 3} {'type': 'loss', 'content': 0.033667609095573425, 'timestamp': '2025-09-30 22:41:29.877804', 'step': 18308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:29.939404', 'step': 18308, 'epoch': 3} {'type': 'loss', 'content': 0.08129244297742844, 'timestamp': '2025-09-30 22:41:29.943071', 'step': 18309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:30.001959', 'step': 18309, 'epoch': 3} {'type': 'loss', 'content': 0.1570482701063156, 'timestamp': '2025-09-30 22:41:30.005085', 'step': 18310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:30.082237', 'step': 18310, 'epoch': 3} {'type': 'loss', 'content': 0.11393333971500397, 'timestamp': '2025-09-30 22:41:30.085472', 'step': 18311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:30.145734', 'step': 18311, 'epoch': 3} {'type': 'loss', 'content': 0.10784860700368881, 'timestamp': '2025-09-30 22:41:30.154010', 'step': 18312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:30.215777', 'step': 18312, 'epoch': 3} {'type': 'loss', 'content': 0.09465381503105164, 'timestamp': '2025-09-30 22:41:30.220265', 'step': 18313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:30.280650', 'step': 18313, 'epoch': 3} {'type': 'loss', 'content': 0.068646639585495, 'timestamp': '2025-09-30 22:41:30.284627', 'step': 18314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:30.379684', 'step': 18314, 'epoch': 3} {'type': 'loss', 'content': 0.060901083052158356, 'timestamp': '2025-09-30 22:41:30.387444', 'step': 18315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:30.475378', 'step': 18315, 'epoch': 3} {'type': 'loss', 'content': 0.10731163620948792, 'timestamp': '2025-09-30 22:41:30.494262', 'step': 18316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:30.562408', 'step': 18316, 'epoch': 3} {'type': 'loss', 'content': 0.12774701416492462, 'timestamp': '2025-09-30 22:41:30.567399', 'step': 18317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:30.627600', 'step': 18317, 'epoch': 3} {'type': 'loss', 'content': 0.16559582948684692, 'timestamp': '2025-09-30 22:41:30.631214', 'step': 18318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:30.689989', 'step': 18318, 'epoch': 3} {'type': 'loss', 'content': 0.1362142562866211, 'timestamp': '2025-09-30 22:41:30.697780', 'step': 18319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:30.758085', 'step': 18319, 'epoch': 3} {'type': 'loss', 'content': 0.06190844997763634, 'timestamp': '2025-09-30 22:41:30.765113', 'step': 18320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:30.823462', 'step': 18320, 'epoch': 3} {'type': 'loss', 'content': 0.10549796372652054, 'timestamp': '2025-09-30 22:41:30.828717', 'step': 18321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:30.888484', 'step': 18321, 'epoch': 3} {'type': 'loss', 'content': 0.1340138465166092, 'timestamp': '2025-09-30 22:41:30.904670', 'step': 18322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:30.965236', 'step': 18322, 'epoch': 3} {'type': 'loss', 'content': 0.13146407902240753, 'timestamp': '2025-09-30 22:41:30.968745', 'step': 18323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:31.027577', 'step': 18323, 'epoch': 3} {'type': 'loss', 'content': 0.10527353733778, 'timestamp': '2025-09-30 22:41:31.034903', 'step': 18324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:31.092472', 'step': 18324, 'epoch': 3} {'type': 'loss', 'content': 0.06204565614461899, 'timestamp': '2025-09-30 22:41:31.095574', 'step': 18325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:31.162082', 'step': 18325, 'epoch': 3} {'type': 'loss', 'content': 0.0779157429933548, 'timestamp': '2025-09-30 22:41:31.170688', 'step': 18326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:31.228855', 'step': 18326, 'epoch': 3} {'type': 'loss', 'content': 0.10302826017141342, 'timestamp': '2025-09-30 22:41:31.234358', 'step': 18327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:31.292304', 'step': 18327, 'epoch': 3} {'type': 'loss', 'content': 0.14100606739521027, 'timestamp': '2025-09-30 22:41:31.301106', 'step': 18328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:31.359120', 'step': 18328, 'epoch': 3} {'type': 'loss', 'content': 0.043352603912353516, 'timestamp': '2025-09-30 22:41:31.362668', 'step': 18329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:31.420503', 'step': 18329, 'epoch': 3} {'type': 'loss', 'content': 0.05030060559511185, 'timestamp': '2025-09-30 22:41:31.423895', 'step': 18330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:31.480720', 'step': 18330, 'epoch': 3} {'type': 'loss', 'content': 0.03961380198597908, 'timestamp': '2025-09-30 22:41:31.484861', 'step': 18331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:31.543057', 'step': 18331, 'epoch': 3} {'type': 'loss', 'content': 0.15196919441223145, 'timestamp': '2025-09-30 22:41:31.550975', 'step': 18332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:31.609119', 'step': 18332, 'epoch': 3} {'type': 'loss', 'content': 0.11863230168819427, 'timestamp': '2025-09-30 22:41:31.613141', 'step': 18333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:31.672309', 'step': 18333, 'epoch': 3} {'type': 'loss', 'content': 0.052347056567668915, 'timestamp': '2025-09-30 22:41:31.675829', 'step': 18334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:31.735976', 'step': 18334, 'epoch': 3} {'type': 'loss', 'content': 0.11893108487129211, 'timestamp': '2025-09-30 22:41:31.740328', 'step': 18335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:31.798673', 'step': 18335, 'epoch': 3} {'type': 'loss', 'content': 0.08491790294647217, 'timestamp': '2025-09-30 22:41:31.820796', 'step': 18336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:31.882828', 'step': 18336, 'epoch': 3} {'type': 'loss', 'content': 0.0448053777217865, 'timestamp': '2025-09-30 22:41:31.886941', 'step': 18337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:31.945206', 'step': 18337, 'epoch': 3} {'type': 'loss', 'content': 0.06994494795799255, 'timestamp': '2025-09-30 22:41:31.952184', 'step': 18338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:32.014141', 'step': 18338, 'epoch': 3} {'type': 'loss', 'content': 0.12168154120445251, 'timestamp': '2025-09-30 22:41:32.017036', 'step': 18339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:32.075952', 'step': 18339, 'epoch': 3} {'type': 'loss', 'content': 0.04679775983095169, 'timestamp': '2025-09-30 22:41:32.084413', 'step': 18340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.141542', 'step': 18340, 'epoch': 3} {'type': 'loss', 'content': 0.09931720793247223, 'timestamp': '2025-09-30 22:41:32.147297', 'step': 18341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.206567', 'step': 18341, 'epoch': 3} {'type': 'loss', 'content': 0.1002250388264656, 'timestamp': '2025-09-30 22:41:32.211808', 'step': 18342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:32.270893', 'step': 18342, 'epoch': 3} {'type': 'loss', 'content': 0.12383227050304413, 'timestamp': '2025-09-30 22:41:32.274132', 'step': 18343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:32.331944', 'step': 18343, 'epoch': 3} {'type': 'loss', 'content': 0.09088357537984848, 'timestamp': '2025-09-30 22:41:32.338921', 'step': 18344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.395865', 'step': 18344, 'epoch': 3} {'type': 'loss', 'content': 0.08784504979848862, 'timestamp': '2025-09-30 22:41:32.398795', 'step': 18345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.464162', 'step': 18345, 'epoch': 3} {'type': 'loss', 'content': 0.04397394135594368, 'timestamp': '2025-09-30 22:41:32.468400', 'step': 18346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:32.525845', 'step': 18346, 'epoch': 3} {'type': 'loss', 'content': 0.1963350474834442, 'timestamp': '2025-09-30 22:41:32.530390', 'step': 18347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:32.592480', 'step': 18347, 'epoch': 3} {'type': 'loss', 'content': 0.1771661341190338, 'timestamp': '2025-09-30 22:41:32.599219', 'step': 18348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:32.658023', 'step': 18348, 'epoch': 3} {'type': 'loss', 'content': 0.040756478905677795, 'timestamp': '2025-09-30 22:41:32.661493', 'step': 18349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:32.721734', 'step': 18349, 'epoch': 3} {'type': 'loss', 'content': 0.15059173107147217, 'timestamp': '2025-09-30 22:41:32.725856', 'step': 18350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.785484', 'step': 18350, 'epoch': 3} {'type': 'loss', 'content': 0.10816989839076996, 'timestamp': '2025-09-30 22:41:32.793211', 'step': 18351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.852251', 'step': 18351, 'epoch': 3} {'type': 'loss', 'content': 0.08052822947502136, 'timestamp': '2025-09-30 22:41:32.860190', 'step': 18352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.916747', 'step': 18352, 'epoch': 3} {'type': 'loss', 'content': 0.09859780967235565, 'timestamp': '2025-09-30 22:41:32.921390', 'step': 18353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:32.983466', 'step': 18353, 'epoch': 3} {'type': 'loss', 'content': 0.09217867255210876, 'timestamp': '2025-09-30 22:41:32.986417', 'step': 18354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:33.044438', 'step': 18354, 'epoch': 3} {'type': 'loss', 'content': 0.056587837636470795, 'timestamp': '2025-09-30 22:41:33.047091', 'step': 18355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:33.104342', 'step': 18355, 'epoch': 3} {'type': 'loss', 'content': 0.18096190690994263, 'timestamp': '2025-09-30 22:41:33.111418', 'step': 18356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:33.181067', 'step': 18356, 'epoch': 3} {'type': 'loss', 'content': 0.06653644144535065, 'timestamp': '2025-09-30 22:41:33.185187', 'step': 18357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:33.244968', 'step': 18357, 'epoch': 3} {'type': 'loss', 'content': 0.1507297158241272, 'timestamp': '2025-09-30 22:41:33.247267', 'step': 18358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:33.315519', 'step': 18358, 'epoch': 3} {'type': 'loss', 'content': 0.04417116940021515, 'timestamp': '2025-09-30 22:41:33.319755', 'step': 18359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:33.377821', 'step': 18359, 'epoch': 3} {'type': 'loss', 'content': 0.0687137320637703, 'timestamp': '2025-09-30 22:41:33.384182', 'step': 18360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:33.441242', 'step': 18360, 'epoch': 3} {'type': 'loss', 'content': 0.09174094349145889, 'timestamp': '2025-09-30 22:41:33.463982', 'step': 18361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:33.552922', 'step': 18361, 'epoch': 3} {'type': 'loss', 'content': 0.13876450061798096, 'timestamp': '2025-09-30 22:41:33.571152', 'step': 18362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:33.644825', 'step': 18362, 'epoch': 3} {'type': 'loss', 'content': 0.1217975988984108, 'timestamp': '2025-09-30 22:41:33.669320', 'step': 18363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:33.749072', 'step': 18363, 'epoch': 3} {'type': 'loss', 'content': 0.06274775415658951, 'timestamp': '2025-09-30 22:41:33.791645', 'step': 18364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:33.869746', 'step': 18364, 'epoch': 3} {'type': 'loss', 'content': 0.08922506123781204, 'timestamp': '2025-09-30 22:41:33.900671', 'step': 18365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:33.984152', 'step': 18365, 'epoch': 3} {'type': 'loss', 'content': 0.0931735709309578, 'timestamp': '2025-09-30 22:41:34.032024', 'step': 18366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:34.107303', 'step': 18366, 'epoch': 3} {'type': 'loss', 'content': 0.05313269421458244, 'timestamp': '2025-09-30 22:41:34.118472', 'step': 18367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:34.197689', 'step': 18367, 'epoch': 3} {'type': 'loss', 'content': 0.11697313189506531, 'timestamp': '2025-09-30 22:41:34.232324', 'step': 18368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:34.313922', 'step': 18368, 'epoch': 3} {'type': 'loss', 'content': 0.1279987096786499, 'timestamp': '2025-09-30 22:41:34.327667', 'step': 18369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:34.394753', 'step': 18369, 'epoch': 3} {'type': 'loss', 'content': 0.047345858067274094, 'timestamp': '2025-09-30 22:41:34.405368', 'step': 18370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:34.479285', 'step': 18370, 'epoch': 3} {'type': 'loss', 'content': 0.15521854162216187, 'timestamp': '2025-09-30 22:41:34.490914', 'step': 18371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:34.582685', 'step': 18371, 'epoch': 3} {'type': 'loss', 'content': 0.12571054697036743, 'timestamp': '2025-09-30 22:41:34.611454', 'step': 18372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:34.700676', 'step': 18372, 'epoch': 3} {'type': 'loss', 'content': 0.08289244025945663, 'timestamp': '2025-09-30 22:41:34.708806', 'step': 18373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:34.775501', 'step': 18373, 'epoch': 3} {'type': 'loss', 'content': 0.11464287340641022, 'timestamp': '2025-09-30 22:41:34.784438', 'step': 18374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:34.853166', 'step': 18374, 'epoch': 3} {'type': 'loss', 'content': 0.07068811357021332, 'timestamp': '2025-09-30 22:41:34.865139', 'step': 18375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:34.947173', 'step': 18375, 'epoch': 3} {'type': 'loss', 'content': 0.11885272711515427, 'timestamp': '2025-09-30 22:41:34.962882', 'step': 18376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.035452', 'step': 18376, 'epoch': 3} {'type': 'loss', 'content': 0.03285802900791168, 'timestamp': '2025-09-30 22:41:35.046208', 'step': 18377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.157116', 'step': 18377, 'epoch': 3} {'type': 'loss', 'content': 0.053768135607242584, 'timestamp': '2025-09-30 22:41:35.178563', 'step': 18378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.252760', 'step': 18378, 'epoch': 3} {'type': 'loss', 'content': 0.13841977715492249, 'timestamp': '2025-09-30 22:41:35.262122', 'step': 18379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:35.345646', 'step': 18379, 'epoch': 3} {'type': 'loss', 'content': 0.09760275483131409, 'timestamp': '2025-09-30 22:41:35.354165', 'step': 18380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:35.426297', 'step': 18380, 'epoch': 3} {'type': 'loss', 'content': 0.07202932238578796, 'timestamp': '2025-09-30 22:41:35.434202', 'step': 18381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:35.495656', 'step': 18381, 'epoch': 3} {'type': 'loss', 'content': 0.0381588451564312, 'timestamp': '2025-09-30 22:41:35.499471', 'step': 18382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.578759', 'step': 18382, 'epoch': 3} {'type': 'loss', 'content': 0.13484039902687073, 'timestamp': '2025-09-30 22:41:35.581814', 'step': 18383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.639985', 'step': 18383, 'epoch': 3} {'type': 'loss', 'content': 0.07686904817819595, 'timestamp': '2025-09-30 22:41:35.648863', 'step': 18384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.706208', 'step': 18384, 'epoch': 3} {'type': 'loss', 'content': 0.09685927629470825, 'timestamp': '2025-09-30 22:41:35.712259', 'step': 18385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.772454', 'step': 18385, 'epoch': 3} {'type': 'loss', 'content': 0.058241717517375946, 'timestamp': '2025-09-30 22:41:35.777903', 'step': 18386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:35.836509', 'step': 18386, 'epoch': 3} {'type': 'loss', 'content': 0.14239482581615448, 'timestamp': '2025-09-30 22:41:35.843520', 'step': 18387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:35.902012', 'step': 18387, 'epoch': 3} {'type': 'loss', 'content': 0.036186497658491135, 'timestamp': '2025-09-30 22:41:35.909461', 'step': 18388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:35.979725', 'step': 18388, 'epoch': 3} {'type': 'loss', 'content': 0.0934157595038414, 'timestamp': '2025-09-30 22:41:35.994993', 'step': 18389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:36.077974', 'step': 18389, 'epoch': 3} {'type': 'loss', 'content': 0.07646963745355606, 'timestamp': '2025-09-30 22:41:36.081208', 'step': 18390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:36.141940', 'step': 18390, 'epoch': 3} {'type': 'loss', 'content': 0.16425001621246338, 'timestamp': '2025-09-30 22:41:36.145521', 'step': 18391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:36.210567', 'step': 18391, 'epoch': 3} {'type': 'loss', 'content': 0.058158040046691895, 'timestamp': '2025-09-30 22:41:36.229395', 'step': 18392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:36.312038', 'step': 18392, 'epoch': 3} {'type': 'loss', 'content': 0.06110382825136185, 'timestamp': '2025-09-30 22:41:36.316547', 'step': 18393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:36.382985', 'step': 18393, 'epoch': 3} {'type': 'loss', 'content': 0.09480001777410507, 'timestamp': '2025-09-30 22:41:36.387840', 'step': 18394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:36.446684', 'step': 18394, 'epoch': 3} {'type': 'loss', 'content': 0.09416168183088303, 'timestamp': '2025-09-30 22:41:36.451628', 'step': 18395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:36.515452', 'step': 18395, 'epoch': 3} {'type': 'loss', 'content': 0.20010976493358612, 'timestamp': '2025-09-30 22:41:36.524146', 'step': 18396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:36.589406', 'step': 18396, 'epoch': 3} {'type': 'loss', 'content': 0.06732485443353653, 'timestamp': '2025-09-30 22:41:36.594834', 'step': 18397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:36.660695', 'step': 18397, 'epoch': 3} {'type': 'loss', 'content': 0.08089406043291092, 'timestamp': '2025-09-30 22:41:36.664706', 'step': 18398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:36.725705', 'step': 18398, 'epoch': 3} {'type': 'loss', 'content': 0.08592724800109863, 'timestamp': '2025-09-30 22:41:36.731529', 'step': 18399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:36.792830', 'step': 18399, 'epoch': 3} {'type': 'loss', 'content': 0.1317504644393921, 'timestamp': '2025-09-30 22:41:36.802093', 'step': 18400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:36.859339', 'step': 18400, 'epoch': 3} {'type': 'loss', 'content': 0.12495185434818268, 'timestamp': '2025-09-30 22:41:36.863834', 'step': 18401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:36.923863', 'step': 18401, 'epoch': 3} {'type': 'loss', 'content': 0.16780634224414825, 'timestamp': '2025-09-30 22:41:36.938923', 'step': 18402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:37.004352', 'step': 18402, 'epoch': 3} {'type': 'loss', 'content': 0.13630998134613037, 'timestamp': '2025-09-30 22:41:37.007297', 'step': 18403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:37.068333', 'step': 18403, 'epoch': 3} {'type': 'loss', 'content': 0.10553435236215591, 'timestamp': '2025-09-30 22:41:37.076499', 'step': 18404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:37.136221', 'step': 18404, 'epoch': 3} {'type': 'loss', 'content': 0.08671862632036209, 'timestamp': '2025-09-30 22:41:37.140539', 'step': 18405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:37.201339', 'step': 18405, 'epoch': 3} {'type': 'loss', 'content': 0.07174620777368546, 'timestamp': '2025-09-30 22:41:37.204319', 'step': 18406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:37.264020', 'step': 18406, 'epoch': 3} {'type': 'loss', 'content': 0.11315669119358063, 'timestamp': '2025-09-30 22:41:37.269174', 'step': 18407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:37.330943', 'step': 18407, 'epoch': 3} {'type': 'loss', 'content': 0.07020861655473709, 'timestamp': '2025-09-30 22:41:37.340048', 'step': 18408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:37.400155', 'step': 18408, 'epoch': 3} {'type': 'loss', 'content': 0.10466016083955765, 'timestamp': '2025-09-30 22:41:37.405494', 'step': 18409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:37.465348', 'step': 18409, 'epoch': 3} {'type': 'loss', 'content': 0.06067623943090439, 'timestamp': '2025-09-30 22:41:37.468342', 'step': 18410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:37.536109', 'step': 18410, 'epoch': 3} {'type': 'loss', 'content': 0.04459705576300621, 'timestamp': '2025-09-30 22:41:37.538972', 'step': 18411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:37.599684', 'step': 18411, 'epoch': 3} {'type': 'loss', 'content': 0.10813020914793015, 'timestamp': '2025-09-30 22:41:37.607038', 'step': 18412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:37.677699', 'step': 18412, 'epoch': 3} {'type': 'loss', 'content': 0.039775095880031586, 'timestamp': '2025-09-30 22:41:37.681050', 'step': 18413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:37.751612', 'step': 18413, 'epoch': 3} {'type': 'loss', 'content': 0.1499052345752716, 'timestamp': '2025-09-30 22:41:37.755102', 'step': 18414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:37.820773', 'step': 18414, 'epoch': 3} {'type': 'loss', 'content': 0.08716240525245667, 'timestamp': '2025-09-30 22:41:37.824096', 'step': 18415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:37.887200', 'step': 18415, 'epoch': 3} {'type': 'loss', 'content': 0.17386317253112793, 'timestamp': '2025-09-30 22:41:37.894347', 'step': 18416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:41:37.953888', 'step': 18416, 'epoch': 3} {'type': 'loss', 'content': 0.04502693936228752, 'timestamp': '2025-09-30 22:41:37.958644', 'step': 18417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:38.020667', 'step': 18417, 'epoch': 3} {'type': 'loss', 'content': 0.11958757042884827, 'timestamp': '2025-09-30 22:41:38.024359', 'step': 18418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:38.084837', 'step': 18418, 'epoch': 3} {'type': 'loss', 'content': 0.09371469914913177, 'timestamp': '2025-09-30 22:41:38.087740', 'step': 18419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:38.147463', 'step': 18419, 'epoch': 3} {'type': 'loss', 'content': 0.07367266714572906, 'timestamp': '2025-09-30 22:41:38.155057', 'step': 18420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:38.213505', 'step': 18420, 'epoch': 3} {'type': 'loss', 'content': 0.14309130609035492, 'timestamp': '2025-09-30 22:41:38.217861', 'step': 18421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:38.276674', 'step': 18421, 'epoch': 3} {'type': 'loss', 'content': 0.08753164857625961, 'timestamp': '2025-09-30 22:41:38.280513', 'step': 18422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:38.349472', 'step': 18422, 'epoch': 3} {'type': 'loss', 'content': 0.052793338894844055, 'timestamp': '2025-09-30 22:41:38.353094', 'step': 18423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:38.416732', 'step': 18423, 'epoch': 3} {'type': 'loss', 'content': 0.08652383089065552, 'timestamp': '2025-09-30 22:41:38.433617', 'step': 18424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:38.490815', 'step': 18424, 'epoch': 3} {'type': 'loss', 'content': 0.060537561774253845, 'timestamp': '2025-09-30 22:41:38.503080', 'step': 18425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:38.561424', 'step': 18425, 'epoch': 3} {'type': 'loss', 'content': 0.09907741099596024, 'timestamp': '2025-09-30 22:41:38.574853', 'step': 18426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:38.632475', 'step': 18426, 'epoch': 3} {'type': 'loss', 'content': 0.10966238379478455, 'timestamp': '2025-09-30 22:41:38.636024', 'step': 18427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:38.694349', 'step': 18427, 'epoch': 3} {'type': 'loss', 'content': 0.0764135867357254, 'timestamp': '2025-09-30 22:41:38.702412', 'step': 18428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:38.759808', 'step': 18428, 'epoch': 3} {'type': 'loss', 'content': 0.08937311917543411, 'timestamp': '2025-09-30 22:41:38.763673', 'step': 18429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:38.822451', 'step': 18429, 'epoch': 3} {'type': 'loss', 'content': 0.11070539057254791, 'timestamp': '2025-09-30 22:41:38.826319', 'step': 18430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:38.884625', 'step': 18430, 'epoch': 3} {'type': 'loss', 'content': 0.13992704451084137, 'timestamp': '2025-09-30 22:41:38.887886', 'step': 18431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:38.953810', 'step': 18431, 'epoch': 3} {'type': 'loss', 'content': 0.08160998672246933, 'timestamp': '2025-09-30 22:41:38.961210', 'step': 18432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:39.028596', 'step': 18432, 'epoch': 3} {'type': 'loss', 'content': 0.026087552309036255, 'timestamp': '2025-09-30 22:41:39.033174', 'step': 18433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:39.092242', 'step': 18433, 'epoch': 3} {'type': 'loss', 'content': 0.11470738798379898, 'timestamp': '2025-09-30 22:41:39.096767', 'step': 18434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:39.156412', 'step': 18434, 'epoch': 3} {'type': 'loss', 'content': 0.19642609357833862, 'timestamp': '2025-09-30 22:41:39.160650', 'step': 18435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:39.220067', 'step': 18435, 'epoch': 3} {'type': 'loss', 'content': 0.10895108431577682, 'timestamp': '2025-09-30 22:41:39.227978', 'step': 18436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:39.286914', 'step': 18436, 'epoch': 3} {'type': 'loss', 'content': 0.11431634426116943, 'timestamp': '2025-09-30 22:41:39.303372', 'step': 18437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:39.361764', 'step': 18437, 'epoch': 3} {'type': 'loss', 'content': 0.10151231288909912, 'timestamp': '2025-09-30 22:41:39.367753', 'step': 18438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:39.429567', 'step': 18438, 'epoch': 3} {'type': 'loss', 'content': 0.06667887419462204, 'timestamp': '2025-09-30 22:41:39.445376', 'step': 18439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:39.507025', 'step': 18439, 'epoch': 3} {'type': 'loss', 'content': 0.13644690811634064, 'timestamp': '2025-09-30 22:41:39.516017', 'step': 18440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:39.577020', 'step': 18440, 'epoch': 3} {'type': 'loss', 'content': 0.07140123099088669, 'timestamp': '2025-09-30 22:41:39.591557', 'step': 18441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:39.651892', 'step': 18441, 'epoch': 3} {'type': 'loss', 'content': 0.10512283444404602, 'timestamp': '2025-09-30 22:41:39.655634', 'step': 18442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:39.716967', 'step': 18442, 'epoch': 3} {'type': 'loss', 'content': 0.14389784634113312, 'timestamp': '2025-09-30 22:41:39.720924', 'step': 18443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:39.781377', 'step': 18443, 'epoch': 3} {'type': 'loss', 'content': 0.04149176552891731, 'timestamp': '2025-09-30 22:41:39.788121', 'step': 18444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:41:39.846895', 'step': 18444, 'epoch': 3} {'type': 'loss', 'content': 0.06000648811459541, 'timestamp': '2025-09-30 22:41:39.857749', 'step': 18445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:39.923579', 'step': 18445, 'epoch': 3} {'type': 'loss', 'content': 0.0762554258108139, 'timestamp': '2025-09-30 22:41:39.927242', 'step': 18446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:39.987629', 'step': 18446, 'epoch': 3} {'type': 'loss', 'content': 0.1662781834602356, 'timestamp': '2025-09-30 22:41:39.990639', 'step': 18447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:40.048654', 'step': 18447, 'epoch': 3} {'type': 'loss', 'content': 0.0235802810639143, 'timestamp': '2025-09-30 22:41:40.058582', 'step': 18448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:40.121378', 'step': 18448, 'epoch': 3} {'type': 'loss', 'content': 0.09337868541479111, 'timestamp': '2025-09-30 22:41:40.125416', 'step': 18449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:40.182436', 'step': 18449, 'epoch': 3} {'type': 'loss', 'content': 0.07767460495233536, 'timestamp': '2025-09-30 22:41:40.185899', 'step': 18450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:40.249939', 'step': 18450, 'epoch': 3} {'type': 'loss', 'content': 0.038340795785188675, 'timestamp': '2025-09-30 22:41:40.255048', 'step': 18451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:40.319789', 'step': 18451, 'epoch': 3} {'type': 'loss', 'content': 0.041951753199100494, 'timestamp': '2025-09-30 22:41:40.329675', 'step': 18452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:40.388256', 'step': 18452, 'epoch': 3} {'type': 'loss', 'content': 0.08219429105520248, 'timestamp': '2025-09-30 22:41:40.392555', 'step': 18453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:40.451054', 'step': 18453, 'epoch': 3} {'type': 'loss', 'content': 0.10479328036308289, 'timestamp': '2025-09-30 22:41:40.455045', 'step': 18454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:40.515603', 'step': 18454, 'epoch': 3} {'type': 'loss', 'content': 0.07663766294717789, 'timestamp': '2025-09-30 22:41:40.520067', 'step': 18455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:40.603393', 'step': 18455, 'epoch': 3} {'type': 'loss', 'content': 0.08638258278369904, 'timestamp': '2025-09-30 22:41:40.610636', 'step': 18456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:40.683631', 'step': 18456, 'epoch': 3} {'type': 'loss', 'content': 0.10291556268930435, 'timestamp': '2025-09-30 22:41:40.693289', 'step': 18457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:40.757274', 'step': 18457, 'epoch': 3} {'type': 'loss', 'content': 0.025453031063079834, 'timestamp': '2025-09-30 22:41:40.760639', 'step': 18458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:40.816879', 'step': 18458, 'epoch': 3} {'type': 'loss', 'content': 0.16855767369270325, 'timestamp': '2025-09-30 22:41:40.820833', 'step': 18459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:40.878303', 'step': 18459, 'epoch': 3} {'type': 'loss', 'content': 0.15240180492401123, 'timestamp': '2025-09-30 22:41:40.885919', 'step': 18460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:40.943306', 'step': 18460, 'epoch': 3} {'type': 'loss', 'content': 0.11425001919269562, 'timestamp': '2025-09-30 22:41:40.952893', 'step': 18461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:41.010927', 'step': 18461, 'epoch': 3} {'type': 'loss', 'content': 0.11773000657558441, 'timestamp': '2025-09-30 22:41:41.013856', 'step': 18462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:41.081051', 'step': 18462, 'epoch': 3} {'type': 'loss', 'content': 0.09326914697885513, 'timestamp': '2025-09-30 22:41:41.084845', 'step': 18463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:41.144450', 'step': 18463, 'epoch': 3} {'type': 'loss', 'content': 0.1034272089600563, 'timestamp': '2025-09-30 22:41:41.152138', 'step': 18464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:41.209358', 'step': 18464, 'epoch': 3} {'type': 'loss', 'content': 0.05347556993365288, 'timestamp': '2025-09-30 22:41:41.219525', 'step': 18465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:41.283778', 'step': 18465, 'epoch': 3} {'type': 'loss', 'content': 0.15789039433002472, 'timestamp': '2025-09-30 22:41:41.288751', 'step': 18466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:41:41.359796', 'step': 18466, 'epoch': 3} {'type': 'loss', 'content': 0.09504605829715729, 'timestamp': '2025-09-30 22:41:41.362570', 'step': 18467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:41.420152', 'step': 18467, 'epoch': 3} {'type': 'loss', 'content': 0.04277680069208145, 'timestamp': '2025-09-30 22:41:41.427210', 'step': 18468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:41.490959', 'step': 18468, 'epoch': 3} {'type': 'loss', 'content': 0.033829525113105774, 'timestamp': '2025-09-30 22:41:41.493822', 'step': 18469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:41.551898', 'step': 18469, 'epoch': 3} {'type': 'loss', 'content': 0.10412254929542542, 'timestamp': '2025-09-30 22:41:41.563918', 'step': 18470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:41.632628', 'step': 18470, 'epoch': 3} {'type': 'loss', 'content': 0.08152776211500168, 'timestamp': '2025-09-30 22:41:41.643561', 'step': 18471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:41.720808', 'step': 18471, 'epoch': 3} {'type': 'loss', 'content': 0.04753397777676582, 'timestamp': '2025-09-30 22:41:41.735062', 'step': 18472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:41.792846', 'step': 18472, 'epoch': 3} {'type': 'loss', 'content': 0.05686912313103676, 'timestamp': '2025-09-30 22:41:41.796068', 'step': 18473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:41.853576', 'step': 18473, 'epoch': 3} {'type': 'loss', 'content': 0.07124250382184982, 'timestamp': '2025-09-30 22:41:41.856455', 'step': 18474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:41.921395', 'step': 18474, 'epoch': 3} {'type': 'loss', 'content': 0.0681154727935791, 'timestamp': '2025-09-30 22:41:41.938256', 'step': 18475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:42.004270', 'step': 18475, 'epoch': 3} {'type': 'loss', 'content': 0.05182535946369171, 'timestamp': '2025-09-30 22:41:42.018074', 'step': 18476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:42.085565', 'step': 18476, 'epoch': 3} {'type': 'loss', 'content': 0.08238786458969116, 'timestamp': '2025-09-30 22:41:42.088801', 'step': 18477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:42.167782', 'step': 18477, 'epoch': 3} {'type': 'loss', 'content': 0.10875662416219711, 'timestamp': '2025-09-30 22:41:42.177700', 'step': 18478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:42.237076', 'step': 18478, 'epoch': 3} {'type': 'loss', 'content': 0.16570022702217102, 'timestamp': '2025-09-30 22:41:42.240763', 'step': 18479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:42.298978', 'step': 18479, 'epoch': 3} {'type': 'loss', 'content': 0.019336186349391937, 'timestamp': '2025-09-30 22:41:42.305742', 'step': 18480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:42.371681', 'step': 18480, 'epoch': 3} {'type': 'loss', 'content': 0.1273101568222046, 'timestamp': '2025-09-30 22:41:42.375347', 'step': 18481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:42.433156', 'step': 18481, 'epoch': 3} {'type': 'loss', 'content': 0.12090712040662766, 'timestamp': '2025-09-30 22:41:42.436206', 'step': 18482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:42.497626', 'step': 18482, 'epoch': 3} {'type': 'loss', 'content': 0.07472755014896393, 'timestamp': '2025-09-30 22:41:42.500872', 'step': 18483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:42.559738', 'step': 18483, 'epoch': 3} {'type': 'loss', 'content': 0.05963074788451195, 'timestamp': '2025-09-30 22:41:42.572796', 'step': 18484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:42.636468', 'step': 18484, 'epoch': 3} {'type': 'loss', 'content': 0.05968554690480232, 'timestamp': '2025-09-30 22:41:42.645027', 'step': 18485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:42.707996', 'step': 18485, 'epoch': 3} {'type': 'loss', 'content': 0.16710913181304932, 'timestamp': '2025-09-30 22:41:42.713202', 'step': 18486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:42.773291', 'step': 18486, 'epoch': 3} {'type': 'loss', 'content': 0.019833484664559364, 'timestamp': '2025-09-30 22:41:42.782749', 'step': 18487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:42.844125', 'step': 18487, 'epoch': 3} {'type': 'loss', 'content': 0.06385695934295654, 'timestamp': '2025-09-30 22:41:42.850627', 'step': 18488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:42.906601', 'step': 18488, 'epoch': 3} {'type': 'loss', 'content': 0.10594204068183899, 'timestamp': '2025-09-30 22:41:42.909270', 'step': 18489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:42.967475', 'step': 18489, 'epoch': 3} {'type': 'loss', 'content': 0.1284828931093216, 'timestamp': '2025-09-30 22:41:42.970866', 'step': 18490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:43.032172', 'step': 18490, 'epoch': 3} {'type': 'loss', 'content': 0.09168406575918198, 'timestamp': '2025-09-30 22:41:43.035304', 'step': 18491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:43.103819', 'step': 18491, 'epoch': 3} {'type': 'loss', 'content': 0.1154320240020752, 'timestamp': '2025-09-30 22:41:43.110215', 'step': 18492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:43.166542', 'step': 18492, 'epoch': 3} {'type': 'loss', 'content': 0.09764201194047928, 'timestamp': '2025-09-30 22:41:43.177704', 'step': 18493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:43.245539', 'step': 18493, 'epoch': 3} {'type': 'loss', 'content': 0.16282200813293457, 'timestamp': '2025-09-30 22:41:43.255254', 'step': 18494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:43.323530', 'step': 18494, 'epoch': 3} {'type': 'loss', 'content': 0.06612065434455872, 'timestamp': '2025-09-30 22:41:43.326899', 'step': 18495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:43.387337', 'step': 18495, 'epoch': 3} {'type': 'loss', 'content': 0.1126115620136261, 'timestamp': '2025-09-30 22:41:43.394792', 'step': 18496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:43.453142', 'step': 18496, 'epoch': 3} {'type': 'loss', 'content': 0.1589232236146927, 'timestamp': '2025-09-30 22:41:43.456193', 'step': 18497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:43.530542', 'step': 18497, 'epoch': 3} {'type': 'loss', 'content': 0.15058055520057678, 'timestamp': '2025-09-30 22:41:43.534226', 'step': 18498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:43.594726', 'step': 18498, 'epoch': 3} {'type': 'loss', 'content': 0.07308906316757202, 'timestamp': '2025-09-30 22:41:43.597913', 'step': 18499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:43.656086', 'step': 18499, 'epoch': 3} {'type': 'loss', 'content': 0.05463986098766327, 'timestamp': '2025-09-30 22:41:43.662438', 'step': 18500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18500', 'timestamp': '2025-09-30 22:41:44.114431', 'step': 18500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:44.183181', 'step': 18500, 'epoch': 3} {'type': 'loss', 'content': 0.08458665758371353, 'timestamp': '2025-09-30 22:41:44.185931', 'step': 18501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:44.249872', 'step': 18501, 'epoch': 3} {'type': 'loss', 'content': 0.07691586017608643, 'timestamp': '2025-09-30 22:41:44.253618', 'step': 18502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:44.316059', 'step': 18502, 'epoch': 3} {'type': 'loss', 'content': 0.0443146787583828, 'timestamp': '2025-09-30 22:41:44.318812', 'step': 18503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:44.375951', 'step': 18503, 'epoch': 3} {'type': 'loss', 'content': 0.09929236769676208, 'timestamp': '2025-09-30 22:41:44.382644', 'step': 18504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:44.442116', 'step': 18504, 'epoch': 3} {'type': 'loss', 'content': 0.18012483417987823, 'timestamp': '2025-09-30 22:41:44.445397', 'step': 18505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:44.518297', 'step': 18505, 'epoch': 3} {'type': 'loss', 'content': 0.16213390231132507, 'timestamp': '2025-09-30 22:41:44.521409', 'step': 18506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:44.577990', 'step': 18506, 'epoch': 3} {'type': 'loss', 'content': 0.06041764095425606, 'timestamp': '2025-09-30 22:41:44.586007', 'step': 18507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:44.650011', 'step': 18507, 'epoch': 3} {'type': 'loss', 'content': 0.09554892778396606, 'timestamp': '2025-09-30 22:41:44.657670', 'step': 18508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:44.721320', 'step': 18508, 'epoch': 3} {'type': 'loss', 'content': 0.12646275758743286, 'timestamp': '2025-09-30 22:41:44.728465', 'step': 18509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:44.795739', 'step': 18509, 'epoch': 3} {'type': 'loss', 'content': 0.10847625881433487, 'timestamp': '2025-09-30 22:41:44.798879', 'step': 18510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:44.864243', 'step': 18510, 'epoch': 3} {'type': 'loss', 'content': 0.046923186630010605, 'timestamp': '2025-09-30 22:41:44.866902', 'step': 18511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:44.938277', 'step': 18511, 'epoch': 3} {'type': 'loss', 'content': 0.0698612779378891, 'timestamp': '2025-09-30 22:41:44.944808', 'step': 18512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:45.022866', 'step': 18512, 'epoch': 3} {'type': 'loss', 'content': 0.06267821043729782, 'timestamp': '2025-09-30 22:41:45.026424', 'step': 18513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:45.083720', 'step': 18513, 'epoch': 3} {'type': 'loss', 'content': 0.12132930755615234, 'timestamp': '2025-09-30 22:41:45.091241', 'step': 18514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:45.163398', 'step': 18514, 'epoch': 3} {'type': 'loss', 'content': 0.08332481980323792, 'timestamp': '2025-09-30 22:41:45.166268', 'step': 18515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:45.225449', 'step': 18515, 'epoch': 3} {'type': 'loss', 'content': 0.06005178019404411, 'timestamp': '2025-09-30 22:41:45.232393', 'step': 18516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:45.300745', 'step': 18516, 'epoch': 3} {'type': 'loss', 'content': 0.10395361483097076, 'timestamp': '2025-09-30 22:41:45.304151', 'step': 18517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:45.374880', 'step': 18517, 'epoch': 3} {'type': 'loss', 'content': 0.06856750696897507, 'timestamp': '2025-09-30 22:41:45.384543', 'step': 18518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:45.443336', 'step': 18518, 'epoch': 3} {'type': 'loss', 'content': 0.0697086974978447, 'timestamp': '2025-09-30 22:41:45.450094', 'step': 18519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:45.507618', 'step': 18519, 'epoch': 3} {'type': 'loss', 'content': 0.10120881348848343, 'timestamp': '2025-09-30 22:41:45.516293', 'step': 18520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:45.587267', 'step': 18520, 'epoch': 3} {'type': 'loss', 'content': 0.13663016259670258, 'timestamp': '2025-09-30 22:41:45.590439', 'step': 18521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:45.655495', 'step': 18521, 'epoch': 3} {'type': 'loss', 'content': 0.03762245178222656, 'timestamp': '2025-09-30 22:41:45.659791', 'step': 18522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:45.718808', 'step': 18522, 'epoch': 3} {'type': 'loss', 'content': 0.05785415321588516, 'timestamp': '2025-09-30 22:41:45.726753', 'step': 18523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:45.788431', 'step': 18523, 'epoch': 3} {'type': 'loss', 'content': 0.09603478759527206, 'timestamp': '2025-09-30 22:41:45.795959', 'step': 18524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:45.854335', 'step': 18524, 'epoch': 3} {'type': 'loss', 'content': 0.06302736699581146, 'timestamp': '2025-09-30 22:41:45.857876', 'step': 18525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:45.916857', 'step': 18525, 'epoch': 3} {'type': 'loss', 'content': 0.06169039011001587, 'timestamp': '2025-09-30 22:41:45.922274', 'step': 18526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:45.988425', 'step': 18526, 'epoch': 3} {'type': 'loss', 'content': 0.1967552900314331, 'timestamp': '2025-09-30 22:41:45.992202', 'step': 18527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:46.051615', 'step': 18527, 'epoch': 3} {'type': 'loss', 'content': 0.09877507388591766, 'timestamp': '2025-09-30 22:41:46.069305', 'step': 18528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:46.130337', 'step': 18528, 'epoch': 3} {'type': 'loss', 'content': 0.10523851215839386, 'timestamp': '2025-09-30 22:41:46.136453', 'step': 18529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:46.202774', 'step': 18529, 'epoch': 3} {'type': 'loss', 'content': 0.11152027547359467, 'timestamp': '2025-09-30 22:41:46.206509', 'step': 18530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:46.267816', 'step': 18530, 'epoch': 3} {'type': 'loss', 'content': 0.200731560587883, 'timestamp': '2025-09-30 22:41:46.272468', 'step': 18531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:46.333235', 'step': 18531, 'epoch': 3} {'type': 'loss', 'content': 0.14877615869045258, 'timestamp': '2025-09-30 22:41:46.341468', 'step': 18532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:46.409832', 'step': 18532, 'epoch': 3} {'type': 'loss', 'content': 0.0874309316277504, 'timestamp': '2025-09-30 22:41:46.413266', 'step': 18533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:46.481770', 'step': 18533, 'epoch': 3} {'type': 'loss', 'content': 0.16499961912631989, 'timestamp': '2025-09-30 22:41:46.484909', 'step': 18534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:46.544772', 'step': 18534, 'epoch': 3} {'type': 'loss', 'content': 0.04874537140130997, 'timestamp': '2025-09-30 22:41:46.548986', 'step': 18535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:46.609830', 'step': 18535, 'epoch': 3} {'type': 'loss', 'content': 0.13646477460861206, 'timestamp': '2025-09-30 22:41:46.617643', 'step': 18536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:46.678345', 'step': 18536, 'epoch': 3} {'type': 'loss', 'content': 0.08107991516590118, 'timestamp': '2025-09-30 22:41:46.681494', 'step': 18537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:46.753536', 'step': 18537, 'epoch': 3} {'type': 'loss', 'content': 0.1061885878443718, 'timestamp': '2025-09-30 22:41:46.756877', 'step': 18538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:46.816365', 'step': 18538, 'epoch': 3} {'type': 'loss', 'content': 0.055366188287734985, 'timestamp': '2025-09-30 22:41:46.821902', 'step': 18539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:46.881741', 'step': 18539, 'epoch': 3} {'type': 'loss', 'content': 0.12816405296325684, 'timestamp': '2025-09-30 22:41:46.888478', 'step': 18540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:46.960199', 'step': 18540, 'epoch': 3} {'type': 'loss', 'content': 0.13370758295059204, 'timestamp': '2025-09-30 22:41:46.963916', 'step': 18541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:47.034727', 'step': 18541, 'epoch': 3} {'type': 'loss', 'content': 0.047483716160058975, 'timestamp': '2025-09-30 22:41:47.037490', 'step': 18542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:47.111663', 'step': 18542, 'epoch': 3} {'type': 'loss', 'content': 0.09352879226207733, 'timestamp': '2025-09-30 22:41:47.115374', 'step': 18543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:47.183671', 'step': 18543, 'epoch': 3} {'type': 'loss', 'content': 0.1469133496284485, 'timestamp': '2025-09-30 22:41:47.190406', 'step': 18544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:47.251731', 'step': 18544, 'epoch': 3} {'type': 'loss', 'content': 0.0766778290271759, 'timestamp': '2025-09-30 22:41:47.256870', 'step': 18545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:47.314966', 'step': 18545, 'epoch': 3} {'type': 'loss', 'content': 0.037093766033649445, 'timestamp': '2025-09-30 22:41:47.319726', 'step': 18546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:47.379622', 'step': 18546, 'epoch': 3} {'type': 'loss', 'content': 0.05142148584127426, 'timestamp': '2025-09-30 22:41:47.382261', 'step': 18547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:47.444095', 'step': 18547, 'epoch': 3} {'type': 'loss', 'content': 0.04905414581298828, 'timestamp': '2025-09-30 22:41:47.450497', 'step': 18548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:47.513851', 'step': 18548, 'epoch': 3} {'type': 'loss', 'content': 0.1246403381228447, 'timestamp': '2025-09-30 22:41:47.516546', 'step': 18549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:47.581994', 'step': 18549, 'epoch': 3} {'type': 'loss', 'content': 0.1271841675043106, 'timestamp': '2025-09-30 22:41:47.585095', 'step': 18550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:47.642467', 'step': 18550, 'epoch': 3} {'type': 'loss', 'content': 0.08791173249483109, 'timestamp': '2025-09-30 22:41:47.645061', 'step': 18551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:47.706116', 'step': 18551, 'epoch': 3} {'type': 'loss', 'content': 0.1592225730419159, 'timestamp': '2025-09-30 22:41:47.713281', 'step': 18552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:47.774971', 'step': 18552, 'epoch': 3} {'type': 'loss', 'content': 0.13413594663143158, 'timestamp': '2025-09-30 22:41:47.779477', 'step': 18553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:47.839728', 'step': 18553, 'epoch': 3} {'type': 'loss', 'content': 0.09884317219257355, 'timestamp': '2025-09-30 22:41:47.844737', 'step': 18554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:47.903663', 'step': 18554, 'epoch': 3} {'type': 'loss', 'content': 0.0656481608748436, 'timestamp': '2025-09-30 22:41:47.907451', 'step': 18555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:47.966620', 'step': 18555, 'epoch': 3} {'type': 'loss', 'content': 0.14001962542533875, 'timestamp': '2025-09-30 22:41:47.983012', 'step': 18556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:41:48.042783', 'step': 18556, 'epoch': 3} {'type': 'loss', 'content': 0.10742256790399551, 'timestamp': '2025-09-30 22:41:48.046892', 'step': 18557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:48.107476', 'step': 18557, 'epoch': 3} {'type': 'loss', 'content': 0.11356496065855026, 'timestamp': '2025-09-30 22:41:48.111391', 'step': 18558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:48.169365', 'step': 18558, 'epoch': 3} {'type': 'loss', 'content': 0.11614754050970078, 'timestamp': '2025-09-30 22:41:48.172513', 'step': 18559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:48.229612', 'step': 18559, 'epoch': 3} {'type': 'loss', 'content': 0.1406739503145218, 'timestamp': '2025-09-30 22:41:48.236296', 'step': 18560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:48.296459', 'step': 18560, 'epoch': 3} {'type': 'loss', 'content': 0.09338806569576263, 'timestamp': '2025-09-30 22:41:48.298618', 'step': 18561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:48.357648', 'step': 18561, 'epoch': 3} {'type': 'loss', 'content': 0.08768750727176666, 'timestamp': '2025-09-30 22:41:48.360839', 'step': 18562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:48.419214', 'step': 18562, 'epoch': 3} {'type': 'loss', 'content': 0.07729817926883698, 'timestamp': '2025-09-30 22:41:48.423558', 'step': 18563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:48.482327', 'step': 18563, 'epoch': 3} {'type': 'loss', 'content': 0.07992776483297348, 'timestamp': '2025-09-30 22:41:48.489999', 'step': 18564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:48.556146', 'step': 18564, 'epoch': 3} {'type': 'loss', 'content': 0.0461721234023571, 'timestamp': '2025-09-30 22:41:48.563518', 'step': 18565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:48.622475', 'step': 18565, 'epoch': 3} {'type': 'loss', 'content': 0.10004996508359909, 'timestamp': '2025-09-30 22:41:48.632431', 'step': 18566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:48.690980', 'step': 18566, 'epoch': 3} {'type': 'loss', 'content': 0.024764660745859146, 'timestamp': '2025-09-30 22:41:48.704843', 'step': 18567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:48.763238', 'step': 18567, 'epoch': 3} {'type': 'loss', 'content': 0.1625325083732605, 'timestamp': '2025-09-30 22:41:48.770095', 'step': 18568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:48.828056', 'step': 18568, 'epoch': 3} {'type': 'loss', 'content': 0.1080702617764473, 'timestamp': '2025-09-30 22:41:48.835784', 'step': 18569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:48.892473', 'step': 18569, 'epoch': 3} {'type': 'loss', 'content': 0.1895189732313156, 'timestamp': '2025-09-30 22:41:48.895080', 'step': 18570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:48.957069', 'step': 18570, 'epoch': 3} {'type': 'loss', 'content': 0.11142051219940186, 'timestamp': '2025-09-30 22:41:48.959625', 'step': 18571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 7040042804608.0}, 'timestamp': '2025-09-30 22:41:49.021848', 'step': 18571, 'epoch': 3} {'type': 'loss', 'content': 0.09653232246637344, 'timestamp': '2025-09-30 22:41:49.032948', 'step': 18572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:49.091193', 'step': 18572, 'epoch': 3} {'type': 'loss', 'content': 0.041929323226213455, 'timestamp': '2025-09-30 22:41:49.093911', 'step': 18573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:49.152374', 'step': 18573, 'epoch': 3} {'type': 'loss', 'content': 0.08148667216300964, 'timestamp': '2025-09-30 22:41:49.155251', 'step': 18574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:49.213516', 'step': 18574, 'epoch': 3} {'type': 'loss', 'content': 0.049221545457839966, 'timestamp': '2025-09-30 22:41:49.216860', 'step': 18575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:49.275845', 'step': 18575, 'epoch': 3} {'type': 'loss', 'content': 0.13322928547859192, 'timestamp': '2025-09-30 22:41:49.282195', 'step': 18576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:49.338022', 'step': 18576, 'epoch': 3} {'type': 'loss', 'content': 0.1349930316209793, 'timestamp': '2025-09-30 22:41:49.341369', 'step': 18577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:49.403461', 'step': 18577, 'epoch': 3} {'type': 'loss', 'content': 0.04654228314757347, 'timestamp': '2025-09-30 22:41:49.406193', 'step': 18578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:49.464130', 'step': 18578, 'epoch': 3} {'type': 'loss', 'content': 0.04276904836297035, 'timestamp': '2025-09-30 22:41:49.468304', 'step': 18579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:49.526966', 'step': 18579, 'epoch': 3} {'type': 'loss', 'content': 0.1926494985818863, 'timestamp': '2025-09-30 22:41:49.533966', 'step': 18580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:49.592953', 'step': 18580, 'epoch': 3} {'type': 'loss', 'content': 0.09808163344860077, 'timestamp': '2025-09-30 22:41:49.595883', 'step': 18581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:49.652808', 'step': 18581, 'epoch': 3} {'type': 'loss', 'content': 0.09297537803649902, 'timestamp': '2025-09-30 22:41:49.655630', 'step': 18582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:49.713485', 'step': 18582, 'epoch': 3} {'type': 'loss', 'content': 0.1962793916463852, 'timestamp': '2025-09-30 22:41:49.716911', 'step': 18583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:49.774174', 'step': 18583, 'epoch': 3} {'type': 'loss', 'content': 0.0860578790307045, 'timestamp': '2025-09-30 22:41:49.781011', 'step': 18584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:41:49.839220', 'step': 18584, 'epoch': 3} {'type': 'loss', 'content': 0.09308204799890518, 'timestamp': '2025-09-30 22:41:49.842245', 'step': 18585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:49.898955', 'step': 18585, 'epoch': 3} {'type': 'loss', 'content': 0.11546201258897781, 'timestamp': '2025-09-30 22:41:49.905116', 'step': 18586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:49.962658', 'step': 18586, 'epoch': 3} {'type': 'loss', 'content': 0.12091046571731567, 'timestamp': '2025-09-30 22:41:49.965630', 'step': 18587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:50.022953', 'step': 18587, 'epoch': 3} {'type': 'loss', 'content': 0.051181860268116, 'timestamp': '2025-09-30 22:41:50.029114', 'step': 18588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:50.086166', 'step': 18588, 'epoch': 3} {'type': 'loss', 'content': 0.04863398149609566, 'timestamp': '2025-09-30 22:41:50.088775', 'step': 18589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:50.154886', 'step': 18589, 'epoch': 3} {'type': 'loss', 'content': 0.11571183800697327, 'timestamp': '2025-09-30 22:41:50.157400', 'step': 18590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:50.223813', 'step': 18590, 'epoch': 3} {'type': 'loss', 'content': 0.06342944502830505, 'timestamp': '2025-09-30 22:41:50.226040', 'step': 18591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:50.282776', 'step': 18591, 'epoch': 3} {'type': 'loss', 'content': 0.10998016595840454, 'timestamp': '2025-09-30 22:41:50.292171', 'step': 18592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:50.349951', 'step': 18592, 'epoch': 3} {'type': 'loss', 'content': 0.060283564031124115, 'timestamp': '2025-09-30 22:41:50.358024', 'step': 18593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:50.416111', 'step': 18593, 'epoch': 3} {'type': 'loss', 'content': 0.11164211481809616, 'timestamp': '2025-09-30 22:41:50.418499', 'step': 18594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:50.475222', 'step': 18594, 'epoch': 3} {'type': 'loss', 'content': 0.18170477449893951, 'timestamp': '2025-09-30 22:41:50.478059', 'step': 18595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:50.536484', 'step': 18595, 'epoch': 3} {'type': 'loss', 'content': 0.03170812129974365, 'timestamp': '2025-09-30 22:41:50.542650', 'step': 18596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:50.613207', 'step': 18596, 'epoch': 3} {'type': 'loss', 'content': 0.09727843850851059, 'timestamp': '2025-09-30 22:41:50.615639', 'step': 18597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:50.671797', 'step': 18597, 'epoch': 3} {'type': 'loss', 'content': 0.07840054482221603, 'timestamp': '2025-09-30 22:41:50.675633', 'step': 18598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:50.732135', 'step': 18598, 'epoch': 3} {'type': 'loss', 'content': 0.10982135683298111, 'timestamp': '2025-09-30 22:41:50.734563', 'step': 18599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:50.792635', 'step': 18599, 'epoch': 3} {'type': 'loss', 'content': 0.1557077318429947, 'timestamp': '2025-09-30 22:41:50.799033', 'step': 18600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:41:50.855944', 'step': 18600, 'epoch': 3} {'type': 'loss', 'content': 0.06706392019987106, 'timestamp': '2025-09-30 22:41:50.858038', 'step': 18601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:50.923175', 'step': 18601, 'epoch': 3} {'type': 'loss', 'content': 0.06428587436676025, 'timestamp': '2025-09-30 22:41:50.927279', 'step': 18602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:50.984307', 'step': 18602, 'epoch': 3} {'type': 'loss', 'content': 0.1374293565750122, 'timestamp': '2025-09-30 22:41:50.987935', 'step': 18603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.044818', 'step': 18603, 'epoch': 3} {'type': 'loss', 'content': 0.11000034213066101, 'timestamp': '2025-09-30 22:41:51.051218', 'step': 18604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:51.109249', 'step': 18604, 'epoch': 3} {'type': 'loss', 'content': 0.0914238765835762, 'timestamp': '2025-09-30 22:41:51.114092', 'step': 18605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.176336', 'step': 18605, 'epoch': 3} {'type': 'loss', 'content': 0.23663219809532166, 'timestamp': '2025-09-30 22:41:51.179337', 'step': 18606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:51.236015', 'step': 18606, 'epoch': 3} {'type': 'loss', 'content': 0.05405119061470032, 'timestamp': '2025-09-30 22:41:51.240298', 'step': 18607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.302211', 'step': 18607, 'epoch': 3} {'type': 'loss', 'content': 0.08506184816360474, 'timestamp': '2025-09-30 22:41:51.309330', 'step': 18608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:51.371122', 'step': 18608, 'epoch': 3} {'type': 'loss', 'content': 0.13162356615066528, 'timestamp': '2025-09-30 22:41:51.373270', 'step': 18609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:51.430993', 'step': 18609, 'epoch': 3} {'type': 'loss', 'content': 0.13129547238349915, 'timestamp': '2025-09-30 22:41:51.433417', 'step': 18610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:51.490081', 'step': 18610, 'epoch': 3} {'type': 'loss', 'content': 0.08591855317354202, 'timestamp': '2025-09-30 22:41:51.498235', 'step': 18611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:51.559046', 'step': 18611, 'epoch': 3} {'type': 'loss', 'content': 0.0674065425992012, 'timestamp': '2025-09-30 22:41:51.565556', 'step': 18612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:51.624471', 'step': 18612, 'epoch': 3} {'type': 'loss', 'content': 0.06752797216176987, 'timestamp': '2025-09-30 22:41:51.627006', 'step': 18613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.683815', 'step': 18613, 'epoch': 3} {'type': 'loss', 'content': 0.17020384967327118, 'timestamp': '2025-09-30 22:41:51.686183', 'step': 18614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.743462', 'step': 18614, 'epoch': 3} {'type': 'loss', 'content': 0.16957871615886688, 'timestamp': '2025-09-30 22:41:51.745754', 'step': 18615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.804058', 'step': 18615, 'epoch': 3} {'type': 'loss', 'content': 0.0453447662293911, 'timestamp': '2025-09-30 22:41:51.809800', 'step': 18616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:41:51.867280', 'step': 18616, 'epoch': 3} {'type': 'loss', 'content': 0.032332152128219604, 'timestamp': '2025-09-30 22:41:51.869498', 'step': 18617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.928966', 'step': 18617, 'epoch': 3} {'type': 'loss', 'content': 0.05822136625647545, 'timestamp': '2025-09-30 22:41:51.931330', 'step': 18618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:51.988279', 'step': 18618, 'epoch': 3} {'type': 'loss', 'content': 0.05149799957871437, 'timestamp': '2025-09-30 22:41:51.991995', 'step': 18619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:52.055072', 'step': 18619, 'epoch': 3} {'type': 'loss', 'content': 0.04402521252632141, 'timestamp': '2025-09-30 22:41:52.061495', 'step': 18620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:52.119974', 'step': 18620, 'epoch': 3} {'type': 'loss', 'content': 0.11589597165584564, 'timestamp': '2025-09-30 22:41:52.122433', 'step': 18621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:52.179823', 'step': 18621, 'epoch': 3} {'type': 'loss', 'content': 0.0975448414683342, 'timestamp': '2025-09-30 22:41:52.185300', 'step': 18622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:52.255305', 'step': 18622, 'epoch': 3} {'type': 'loss', 'content': 0.10190724581480026, 'timestamp': '2025-09-30 22:41:52.259432', 'step': 18623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:52.315955', 'step': 18623, 'epoch': 3} {'type': 'loss', 'content': 0.10867754369974136, 'timestamp': '2025-09-30 22:41:52.322082', 'step': 18624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:52.379339', 'step': 18624, 'epoch': 3} {'type': 'loss', 'content': 0.11495352536439896, 'timestamp': '2025-09-30 22:41:52.381742', 'step': 18625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:52.438524', 'step': 18625, 'epoch': 3} {'type': 'loss', 'content': 0.08359427005052567, 'timestamp': '2025-09-30 22:41:52.441341', 'step': 18626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:52.498453', 'step': 18626, 'epoch': 3} {'type': 'loss', 'content': 0.1772606074810028, 'timestamp': '2025-09-30 22:41:52.503952', 'step': 18627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:52.561994', 'step': 18627, 'epoch': 3} {'type': 'loss', 'content': 0.12531651556491852, 'timestamp': '2025-09-30 22:41:52.568055', 'step': 18628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:52.636635', 'step': 18628, 'epoch': 3} {'type': 'loss', 'content': 0.10567595064640045, 'timestamp': '2025-09-30 22:41:52.639039', 'step': 18629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:41:52.698867', 'step': 18629, 'epoch': 3} {'type': 'loss', 'content': 0.04228111729025841, 'timestamp': '2025-09-30 22:41:52.702758', 'step': 18630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:52.761593', 'step': 18630, 'epoch': 3} {'type': 'loss', 'content': 0.10171514004468918, 'timestamp': '2025-09-30 22:41:52.771893', 'step': 18631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:52.830401', 'step': 18631, 'epoch': 3} {'type': 'loss', 'content': 0.05616719648241997, 'timestamp': '2025-09-30 22:41:52.836528', 'step': 18632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:52.892687', 'step': 18632, 'epoch': 3} {'type': 'loss', 'content': 0.13965187966823578, 'timestamp': '2025-09-30 22:41:52.895284', 'step': 18633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:52.966526', 'step': 18633, 'epoch': 3} {'type': 'loss', 'content': 0.11620917171239853, 'timestamp': '2025-09-30 22:41:52.970264', 'step': 18634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:41:53.040758', 'step': 18634, 'epoch': 3} {'type': 'loss', 'content': 0.14228400588035583, 'timestamp': '2025-09-30 22:41:53.043228', 'step': 18635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:53.103121', 'step': 18635, 'epoch': 3} {'type': 'loss', 'content': 0.18125101923942566, 'timestamp': '2025-09-30 22:41:53.109789', 'step': 18636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:53.190105', 'step': 18636, 'epoch': 3} {'type': 'loss', 'content': 0.06400594115257263, 'timestamp': '2025-09-30 22:41:53.192650', 'step': 18637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:41:53.255027', 'step': 18637, 'epoch': 3} {'type': 'loss', 'content': 0.06657423079013824, 'timestamp': '2025-09-30 22:41:53.260113', 'step': 18638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:41:53.319591', 'step': 18638, 'epoch': 3} {'type': 'loss', 'content': 0.11410429328680038, 'timestamp': '2025-09-30 22:41:53.321776', 'step': 18639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:41:53.395619', 'step': 18639, 'epoch': 3} {'type': 'loss', 'content': 0.08317559957504272, 'timestamp': '2025-09-30 22:41:53.403683', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:42:09.428921', 'step': 18640, 'epoch': 3} {'type': 'pplx', 'content': 9546.598851306393, 'timestamp': '2025-09-30 22:42:09.434305', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:09.508370', 'step': 18640, 'epoch': 3} {'type': 'loss', 'content': 0.14023658633232117, 'timestamp': '2025-09-30 22:42:09.511339', 'step': 18641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:09.569968', 'step': 18641, 'epoch': 3} {'type': 'loss', 'content': 0.044068917632102966, 'timestamp': '2025-09-30 22:42:09.572096', 'step': 18642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:09.633079', 'step': 18642, 'epoch': 3} {'type': 'loss', 'content': 0.13757532835006714, 'timestamp': '2025-09-30 22:42:09.636295', 'step': 18643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:09.694191', 'step': 18643, 'epoch': 3} {'type': 'loss', 'content': 0.08899987488985062, 'timestamp': '2025-09-30 22:42:09.700453', 'step': 18644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:09.757655', 'step': 18644, 'epoch': 3} {'type': 'loss', 'content': 0.09257660806179047, 'timestamp': '2025-09-30 22:42:09.760063', 'step': 18645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:09.816685', 'step': 18645, 'epoch': 3} {'type': 'loss', 'content': 0.11913283169269562, 'timestamp': '2025-09-30 22:42:09.819244', 'step': 18646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:09.878462', 'step': 18646, 'epoch': 3} {'type': 'loss', 'content': 0.09513656795024872, 'timestamp': '2025-09-30 22:42:09.880772', 'step': 18647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:09.939560', 'step': 18647, 'epoch': 3} {'type': 'loss', 'content': 0.06674523651599884, 'timestamp': '2025-09-30 22:42:09.945672', 'step': 18648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:10.002397', 'step': 18648, 'epoch': 3} {'type': 'loss', 'content': 0.023792413994669914, 'timestamp': '2025-09-30 22:42:10.004875', 'step': 18649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:10.061755', 'step': 18649, 'epoch': 3} {'type': 'loss', 'content': 0.14586365222930908, 'timestamp': '2025-09-30 22:42:10.065423', 'step': 18650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:10.127026', 'step': 18650, 'epoch': 3} {'type': 'loss', 'content': 0.07760902494192123, 'timestamp': '2025-09-30 22:42:10.131211', 'step': 18651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:10.188663', 'step': 18651, 'epoch': 3} {'type': 'loss', 'content': 0.24975065886974335, 'timestamp': '2025-09-30 22:42:10.194733', 'step': 18652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:10.252928', 'step': 18652, 'epoch': 3} {'type': 'loss', 'content': 0.13619905710220337, 'timestamp': '2025-09-30 22:42:10.255196', 'step': 18653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:10.311978', 'step': 18653, 'epoch': 3} {'type': 'loss', 'content': 0.09804826229810715, 'timestamp': '2025-09-30 22:42:10.314569', 'step': 18654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:10.371343', 'step': 18654, 'epoch': 3} {'type': 'loss', 'content': 0.05973919481039047, 'timestamp': '2025-09-30 22:42:10.374201', 'step': 18655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:10.432051', 'step': 18655, 'epoch': 3} {'type': 'loss', 'content': 0.10234153270721436, 'timestamp': '2025-09-30 22:42:10.437955', 'step': 18656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:10.495195', 'step': 18656, 'epoch': 3} {'type': 'loss', 'content': 0.10699818283319473, 'timestamp': '2025-09-30 22:42:10.497457', 'step': 18657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:10.555166', 'step': 18657, 'epoch': 3} {'type': 'loss', 'content': 0.010841293260455132, 'timestamp': '2025-09-30 22:42:10.557734', 'step': 18658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:10.621723', 'step': 18658, 'epoch': 3} {'type': 'loss', 'content': 0.08079413324594498, 'timestamp': '2025-09-30 22:42:10.624378', 'step': 18659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:10.697335', 'step': 18659, 'epoch': 3} {'type': 'loss', 'content': 0.10568096488714218, 'timestamp': '2025-09-30 22:42:10.704843', 'step': 18660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:10.763679', 'step': 18660, 'epoch': 3} {'type': 'loss', 'content': 0.10103902965784073, 'timestamp': '2025-09-30 22:42:10.765969', 'step': 18661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:10.824435', 'step': 18661, 'epoch': 3} {'type': 'loss', 'content': 0.058340124785900116, 'timestamp': '2025-09-30 22:42:10.826608', 'step': 18662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:10.885788', 'step': 18662, 'epoch': 3} {'type': 'loss', 'content': 0.11983590573072433, 'timestamp': '2025-09-30 22:42:10.889207', 'step': 18663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:10.947408', 'step': 18663, 'epoch': 3} {'type': 'loss', 'content': 0.12793003022670746, 'timestamp': '2025-09-30 22:42:10.953291', 'step': 18664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:11.013142', 'step': 18664, 'epoch': 3} {'type': 'loss', 'content': 0.09608223289251328, 'timestamp': '2025-09-30 22:42:11.015442', 'step': 18665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:11.081891', 'step': 18665, 'epoch': 3} {'type': 'loss', 'content': 0.08635896444320679, 'timestamp': '2025-09-30 22:42:11.084733', 'step': 18666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:11.143621', 'step': 18666, 'epoch': 3} {'type': 'loss', 'content': 0.09599620848894119, 'timestamp': '2025-09-30 22:42:11.146199', 'step': 18667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:11.205188', 'step': 18667, 'epoch': 3} {'type': 'loss', 'content': 0.09388716518878937, 'timestamp': '2025-09-30 22:42:11.211221', 'step': 18668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:11.267000', 'step': 18668, 'epoch': 3} {'type': 'loss', 'content': 0.1244831383228302, 'timestamp': '2025-09-30 22:42:11.269208', 'step': 18669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:11.330178', 'step': 18669, 'epoch': 3} {'type': 'loss', 'content': 0.076820008456707, 'timestamp': '2025-09-30 22:42:11.332736', 'step': 18670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:11.398236', 'step': 18670, 'epoch': 3} {'type': 'loss', 'content': 0.10415701568126678, 'timestamp': '2025-09-30 22:42:11.400538', 'step': 18671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:11.458188', 'step': 18671, 'epoch': 3} {'type': 'loss', 'content': 0.032371219247579575, 'timestamp': '2025-09-30 22:42:11.464496', 'step': 18672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:11.527240', 'step': 18672, 'epoch': 3} {'type': 'loss', 'content': 0.10198485851287842, 'timestamp': '2025-09-30 22:42:11.529566', 'step': 18673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:11.585828', 'step': 18673, 'epoch': 3} {'type': 'loss', 'content': 0.07686762511730194, 'timestamp': '2025-09-30 22:42:11.588209', 'step': 18674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:11.647274', 'step': 18674, 'epoch': 3} {'type': 'loss', 'content': 0.048215270042419434, 'timestamp': '2025-09-30 22:42:11.650239', 'step': 18675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:11.709181', 'step': 18675, 'epoch': 3} {'type': 'loss', 'content': 0.08156796544790268, 'timestamp': '2025-09-30 22:42:11.715100', 'step': 18676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:11.785142', 'step': 18676, 'epoch': 3} {'type': 'loss', 'content': 0.08285541832447052, 'timestamp': '2025-09-30 22:42:11.787610', 'step': 18677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:42:11.872123', 'step': 18677, 'epoch': 3} {'type': 'loss', 'content': 0.15460766851902008, 'timestamp': '2025-09-30 22:42:11.874485', 'step': 18678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:11.968086', 'step': 18678, 'epoch': 3} {'type': 'loss', 'content': 0.045423828065395355, 'timestamp': '2025-09-30 22:42:11.970470', 'step': 18679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:12.028916', 'step': 18679, 'epoch': 3} {'type': 'loss', 'content': 0.0456681065261364, 'timestamp': '2025-09-30 22:42:12.034838', 'step': 18680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:12.095011', 'step': 18680, 'epoch': 3} {'type': 'loss', 'content': 0.057703837752342224, 'timestamp': '2025-09-30 22:42:12.097968', 'step': 18681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:12.154599', 'step': 18681, 'epoch': 3} {'type': 'loss', 'content': 0.0885111466050148, 'timestamp': '2025-09-30 22:42:12.157280', 'step': 18682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:12.220920', 'step': 18682, 'epoch': 3} {'type': 'loss', 'content': 0.06793951988220215, 'timestamp': '2025-09-30 22:42:12.223855', 'step': 18683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:12.281199', 'step': 18683, 'epoch': 3} {'type': 'loss', 'content': 0.06609697639942169, 'timestamp': '2025-09-30 22:42:12.287303', 'step': 18684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:12.346664', 'step': 18684, 'epoch': 3} {'type': 'loss', 'content': 0.03887704387307167, 'timestamp': '2025-09-30 22:42:12.348960', 'step': 18685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:12.407535', 'step': 18685, 'epoch': 3} {'type': 'loss', 'content': 0.048782944679260254, 'timestamp': '2025-09-30 22:42:12.410517', 'step': 18686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:12.469929', 'step': 18686, 'epoch': 3} {'type': 'loss', 'content': 0.09007854759693146, 'timestamp': '2025-09-30 22:42:12.473591', 'step': 18687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:12.530498', 'step': 18687, 'epoch': 3} {'type': 'loss', 'content': 0.10064645856618881, 'timestamp': '2025-09-30 22:42:12.536482', 'step': 18688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:12.592070', 'step': 18688, 'epoch': 3} {'type': 'loss', 'content': 0.10581441968679428, 'timestamp': '2025-09-30 22:42:12.594545', 'step': 18689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:12.651685', 'step': 18689, 'epoch': 3} {'type': 'loss', 'content': 0.06707727164030075, 'timestamp': '2025-09-30 22:42:12.653992', 'step': 18690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:12.711171', 'step': 18690, 'epoch': 3} {'type': 'loss', 'content': 0.06563753634691238, 'timestamp': '2025-09-30 22:42:12.713655', 'step': 18691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:12.771570', 'step': 18691, 'epoch': 3} {'type': 'loss', 'content': 0.11948919296264648, 'timestamp': '2025-09-30 22:42:12.777789', 'step': 18692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:12.868394', 'step': 18692, 'epoch': 3} {'type': 'loss', 'content': 0.06587252020835876, 'timestamp': '2025-09-30 22:42:12.871125', 'step': 18693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:12.944901', 'step': 18693, 'epoch': 3} {'type': 'loss', 'content': 0.16751019656658173, 'timestamp': '2025-09-30 22:42:12.947889', 'step': 18694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:13.024468', 'step': 18694, 'epoch': 3} {'type': 'loss', 'content': 0.04581901803612709, 'timestamp': '2025-09-30 22:42:13.026644', 'step': 18695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:13.086855', 'step': 18695, 'epoch': 3} {'type': 'loss', 'content': 0.03054816462099552, 'timestamp': '2025-09-30 22:42:13.096392', 'step': 18696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:13.163433', 'step': 18696, 'epoch': 3} {'type': 'loss', 'content': 0.07890873402357101, 'timestamp': '2025-09-30 22:42:13.167401', 'step': 18697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:13.241895', 'step': 18697, 'epoch': 3} {'type': 'loss', 'content': 0.085203155875206, 'timestamp': '2025-09-30 22:42:13.244001', 'step': 18698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:13.319046', 'step': 18698, 'epoch': 3} {'type': 'loss', 'content': 0.060641102492809296, 'timestamp': '2025-09-30 22:42:13.321965', 'step': 18699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:13.404015', 'step': 18699, 'epoch': 3} {'type': 'loss', 'content': 0.09364542365074158, 'timestamp': '2025-09-30 22:42:13.410534', 'step': 18700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:13.486991', 'step': 18700, 'epoch': 3} {'type': 'loss', 'content': 0.08685735613107681, 'timestamp': '2025-09-30 22:42:13.489402', 'step': 18701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:13.565299', 'step': 18701, 'epoch': 3} {'type': 'loss', 'content': 0.04837680235505104, 'timestamp': '2025-09-30 22:42:13.568736', 'step': 18702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:13.667906', 'step': 18702, 'epoch': 3} {'type': 'loss', 'content': 0.08740299195051193, 'timestamp': '2025-09-30 22:42:13.671066', 'step': 18703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:13.743808', 'step': 18703, 'epoch': 3} {'type': 'loss', 'content': 0.06600864976644516, 'timestamp': '2025-09-30 22:42:13.749812', 'step': 18704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:13.810793', 'step': 18704, 'epoch': 3} {'type': 'loss', 'content': 0.14841896295547485, 'timestamp': '2025-09-30 22:42:13.814540', 'step': 18705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:13.872645', 'step': 18705, 'epoch': 3} {'type': 'loss', 'content': 0.03632092848420143, 'timestamp': '2025-09-30 22:42:13.875007', 'step': 18706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:13.961380', 'step': 18706, 'epoch': 3} {'type': 'loss', 'content': 0.08495457470417023, 'timestamp': '2025-09-30 22:42:13.965931', 'step': 18707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:42:14.049324', 'step': 18707, 'epoch': 3} {'type': 'loss', 'content': 0.13390608131885529, 'timestamp': '2025-09-30 22:42:14.056971', 'step': 18708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:14.138607', 'step': 18708, 'epoch': 3} {'type': 'loss', 'content': 0.11210091412067413, 'timestamp': '2025-09-30 22:42:14.141348', 'step': 18709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:14.220283', 'step': 18709, 'epoch': 3} {'type': 'loss', 'content': 0.09430524706840515, 'timestamp': '2025-09-30 22:42:14.222811', 'step': 18710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:14.294858', 'step': 18710, 'epoch': 3} {'type': 'loss', 'content': 0.13235202431678772, 'timestamp': '2025-09-30 22:42:14.298172', 'step': 18711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:14.373675', 'step': 18711, 'epoch': 3} {'type': 'loss', 'content': 0.05476744472980499, 'timestamp': '2025-09-30 22:42:14.380527', 'step': 18712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:14.463597', 'step': 18712, 'epoch': 3} {'type': 'loss', 'content': 0.1348334401845932, 'timestamp': '2025-09-30 22:42:14.466953', 'step': 18713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:14.524982', 'step': 18713, 'epoch': 3} {'type': 'loss', 'content': 0.15671373903751373, 'timestamp': '2025-09-30 22:42:14.527572', 'step': 18714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:14.585534', 'step': 18714, 'epoch': 3} {'type': 'loss', 'content': 0.1112324595451355, 'timestamp': '2025-09-30 22:42:14.588829', 'step': 18715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:14.659351', 'step': 18715, 'epoch': 3} {'type': 'loss', 'content': 0.08722302317619324, 'timestamp': '2025-09-30 22:42:14.666062', 'step': 18716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:14.729795', 'step': 18716, 'epoch': 3} {'type': 'loss', 'content': 0.01466821413487196, 'timestamp': '2025-09-30 22:42:14.732772', 'step': 18717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:14.789600', 'step': 18717, 'epoch': 3} {'type': 'loss', 'content': 0.062446266412734985, 'timestamp': '2025-09-30 22:42:14.792086', 'step': 18718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:14.849388', 'step': 18718, 'epoch': 3} {'type': 'loss', 'content': 0.13259907066822052, 'timestamp': '2025-09-30 22:42:14.851913', 'step': 18719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:14.922752', 'step': 18719, 'epoch': 3} {'type': 'loss', 'content': 0.03208300843834877, 'timestamp': '2025-09-30 22:42:14.928643', 'step': 18720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:14.986911', 'step': 18720, 'epoch': 3} {'type': 'loss', 'content': 0.06609971821308136, 'timestamp': '2025-09-30 22:42:14.989877', 'step': 18721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:15.051909', 'step': 18721, 'epoch': 3} {'type': 'loss', 'content': 0.057256776839494705, 'timestamp': '2025-09-30 22:42:15.054565', 'step': 18722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:15.111960', 'step': 18722, 'epoch': 3} {'type': 'loss', 'content': 0.07164826989173889, 'timestamp': '2025-09-30 22:42:15.116298', 'step': 18723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:15.175563', 'step': 18723, 'epoch': 3} {'type': 'loss', 'content': 0.14282995462417603, 'timestamp': '2025-09-30 22:42:15.181517', 'step': 18724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:15.239285', 'step': 18724, 'epoch': 3} {'type': 'loss', 'content': 0.03719813749194145, 'timestamp': '2025-09-30 22:42:15.241749', 'step': 18725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:15.300035', 'step': 18725, 'epoch': 3} {'type': 'loss', 'content': 0.05129324644804001, 'timestamp': '2025-09-30 22:42:15.302491', 'step': 18726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:15.359420', 'step': 18726, 'epoch': 3} {'type': 'loss', 'content': 0.08150730282068253, 'timestamp': '2025-09-30 22:42:15.361863', 'step': 18727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:15.418592', 'step': 18727, 'epoch': 3} {'type': 'loss', 'content': 0.012271121144294739, 'timestamp': '2025-09-30 22:42:15.424571', 'step': 18728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:15.481900', 'step': 18728, 'epoch': 3} {'type': 'loss', 'content': 0.16414882242679596, 'timestamp': '2025-09-30 22:42:15.484293', 'step': 18729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:15.542170', 'step': 18729, 'epoch': 3} {'type': 'loss', 'content': 0.08520209044218063, 'timestamp': '2025-09-30 22:42:15.544478', 'step': 18730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:15.603304', 'step': 18730, 'epoch': 3} {'type': 'loss', 'content': 0.06117544323205948, 'timestamp': '2025-09-30 22:42:15.606016', 'step': 18731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:15.665755', 'step': 18731, 'epoch': 3} {'type': 'loss', 'content': 0.13399305939674377, 'timestamp': '2025-09-30 22:42:15.672083', 'step': 18732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:15.734082', 'step': 18732, 'epoch': 3} {'type': 'loss', 'content': 0.11111672222614288, 'timestamp': '2025-09-30 22:42:15.736689', 'step': 18733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:15.795178', 'step': 18733, 'epoch': 3} {'type': 'loss', 'content': 0.027924101799726486, 'timestamp': '2025-09-30 22:42:15.797911', 'step': 18734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:15.884359', 'step': 18734, 'epoch': 3} {'type': 'loss', 'content': 0.052268777042627335, 'timestamp': '2025-09-30 22:42:15.886555', 'step': 18735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:15.950845', 'step': 18735, 'epoch': 3} {'type': 'loss', 'content': 0.10816274583339691, 'timestamp': '2025-09-30 22:42:15.958937', 'step': 18736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:16.018603', 'step': 18736, 'epoch': 3} {'type': 'loss', 'content': 0.07223886251449585, 'timestamp': '2025-09-30 22:42:16.021152', 'step': 18737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:16.082744', 'step': 18737, 'epoch': 3} {'type': 'loss', 'content': 0.016624998301267624, 'timestamp': '2025-09-30 22:42:16.085023', 'step': 18738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:16.142100', 'step': 18738, 'epoch': 3} {'type': 'loss', 'content': 0.10879245400428772, 'timestamp': '2025-09-30 22:42:16.145158', 'step': 18739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:16.203803', 'step': 18739, 'epoch': 3} {'type': 'loss', 'content': 0.10746133327484131, 'timestamp': '2025-09-30 22:42:16.211384', 'step': 18740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:16.267991', 'step': 18740, 'epoch': 3} {'type': 'loss', 'content': 0.058288659900426865, 'timestamp': '2025-09-30 22:42:16.275228', 'step': 18741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:16.332549', 'step': 18741, 'epoch': 3} {'type': 'loss', 'content': 0.13717658817768097, 'timestamp': '2025-09-30 22:42:16.335136', 'step': 18742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:16.392587', 'step': 18742, 'epoch': 3} {'type': 'loss', 'content': 0.03623759374022484, 'timestamp': '2025-09-30 22:42:16.394881', 'step': 18743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:16.464273', 'step': 18743, 'epoch': 3} {'type': 'loss', 'content': 0.11551541090011597, 'timestamp': '2025-09-30 22:42:16.471891', 'step': 18744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:16.528403', 'step': 18744, 'epoch': 3} {'type': 'loss', 'content': 0.060338959097862244, 'timestamp': '2025-09-30 22:42:16.530979', 'step': 18745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:16.587652', 'step': 18745, 'epoch': 3} {'type': 'loss', 'content': 0.1667533963918686, 'timestamp': '2025-09-30 22:42:16.590468', 'step': 18746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:16.651395', 'step': 18746, 'epoch': 3} {'type': 'loss', 'content': 0.05319136008620262, 'timestamp': '2025-09-30 22:42:16.653984', 'step': 18747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:16.713556', 'step': 18747, 'epoch': 3} {'type': 'loss', 'content': 0.1418299674987793, 'timestamp': '2025-09-30 22:42:16.719423', 'step': 18748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:16.776323', 'step': 18748, 'epoch': 3} {'type': 'loss', 'content': 0.07577978819608688, 'timestamp': '2025-09-30 22:42:16.778559', 'step': 18749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:16.835228', 'step': 18749, 'epoch': 3} {'type': 'loss', 'content': 0.09253587573766708, 'timestamp': '2025-09-30 22:42:16.837640', 'step': 18750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:16.895456', 'step': 18750, 'epoch': 3} {'type': 'loss', 'content': 0.05577715113759041, 'timestamp': '2025-09-30 22:42:16.898400', 'step': 18751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:16.956481', 'step': 18751, 'epoch': 3} {'type': 'loss', 'content': 0.11276189237833023, 'timestamp': '2025-09-30 22:42:16.962672', 'step': 18752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:17.028381', 'step': 18752, 'epoch': 3} {'type': 'loss', 'content': 0.15205839276313782, 'timestamp': '2025-09-30 22:42:17.030854', 'step': 18753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:17.102983', 'step': 18753, 'epoch': 3} {'type': 'loss', 'content': 0.12619337439537048, 'timestamp': '2025-09-30 22:42:17.105491', 'step': 18754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:17.168755', 'step': 18754, 'epoch': 3} {'type': 'loss', 'content': 0.11774126440286636, 'timestamp': '2025-09-30 22:42:17.171412', 'step': 18755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:17.233423', 'step': 18755, 'epoch': 3} {'type': 'loss', 'content': 0.10518081486225128, 'timestamp': '2025-09-30 22:42:17.243297', 'step': 18756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:17.305019', 'step': 18756, 'epoch': 3} {'type': 'loss', 'content': 0.10057935118675232, 'timestamp': '2025-09-30 22:42:17.307574', 'step': 18757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:17.367523', 'step': 18757, 'epoch': 3} {'type': 'loss', 'content': 0.05327455326914787, 'timestamp': '2025-09-30 22:42:17.370018', 'step': 18758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:17.455587', 'step': 18758, 'epoch': 3} {'type': 'loss', 'content': 0.08547456562519073, 'timestamp': '2025-09-30 22:42:17.458020', 'step': 18759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:17.516745', 'step': 18759, 'epoch': 3} {'type': 'loss', 'content': 0.13864439725875854, 'timestamp': '2025-09-30 22:42:17.522714', 'step': 18760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:17.579225', 'step': 18760, 'epoch': 3} {'type': 'loss', 'content': 0.15560761094093323, 'timestamp': '2025-09-30 22:42:17.581759', 'step': 18761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:17.641529', 'step': 18761, 'epoch': 3} {'type': 'loss', 'content': 0.0635375827550888, 'timestamp': '2025-09-30 22:42:17.643972', 'step': 18762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:17.702740', 'step': 18762, 'epoch': 3} {'type': 'loss', 'content': 0.08026564866304398, 'timestamp': '2025-09-30 22:42:17.710375', 'step': 18763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:17.767351', 'step': 18763, 'epoch': 3} {'type': 'loss', 'content': 0.14094533026218414, 'timestamp': '2025-09-30 22:42:17.774303', 'step': 18764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:17.832141', 'step': 18764, 'epoch': 3} {'type': 'loss', 'content': 0.1617935448884964, 'timestamp': '2025-09-30 22:42:17.834412', 'step': 18765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:17.891556', 'step': 18765, 'epoch': 3} {'type': 'loss', 'content': 0.07212740182876587, 'timestamp': '2025-09-30 22:42:17.893951', 'step': 18766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:17.963745', 'step': 18766, 'epoch': 3} {'type': 'loss', 'content': 0.1633973866701126, 'timestamp': '2025-09-30 22:42:17.966171', 'step': 18767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.024299', 'step': 18767, 'epoch': 3} {'type': 'loss', 'content': 0.1450733095407486, 'timestamp': '2025-09-30 22:42:18.031441', 'step': 18768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.090648', 'step': 18768, 'epoch': 3} {'type': 'loss', 'content': 0.10876703262329102, 'timestamp': '2025-09-30 22:42:18.094311', 'step': 18769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.152592', 'step': 18769, 'epoch': 3} {'type': 'loss', 'content': 0.10216246545314789, 'timestamp': '2025-09-30 22:42:18.155546', 'step': 18770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:18.221977', 'step': 18770, 'epoch': 3} {'type': 'loss', 'content': 0.08685273677110672, 'timestamp': '2025-09-30 22:42:18.224287', 'step': 18771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.293618', 'step': 18771, 'epoch': 3} {'type': 'loss', 'content': 0.04660467803478241, 'timestamp': '2025-09-30 22:42:18.299794', 'step': 18772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:18.356526', 'step': 18772, 'epoch': 3} {'type': 'loss', 'content': 0.1228153333067894, 'timestamp': '2025-09-30 22:42:18.359057', 'step': 18773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:18.416756', 'step': 18773, 'epoch': 3} {'type': 'loss', 'content': 0.0714842900633812, 'timestamp': '2025-09-30 22:42:18.419300', 'step': 18774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:18.478437', 'step': 18774, 'epoch': 3} {'type': 'loss', 'content': 0.10530948638916016, 'timestamp': '2025-09-30 22:42:18.481323', 'step': 18775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.544616', 'step': 18775, 'epoch': 3} {'type': 'loss', 'content': 0.14726725220680237, 'timestamp': '2025-09-30 22:42:18.550563', 'step': 18776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.606148', 'step': 18776, 'epoch': 3} {'type': 'loss', 'content': 0.12258818000555038, 'timestamp': '2025-09-30 22:42:18.609180', 'step': 18777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.666098', 'step': 18777, 'epoch': 3} {'type': 'loss', 'content': 0.09476345777511597, 'timestamp': '2025-09-30 22:42:18.668321', 'step': 18778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:18.725494', 'step': 18778, 'epoch': 3} {'type': 'loss', 'content': 0.05204647034406662, 'timestamp': '2025-09-30 22:42:18.727939', 'step': 18779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:18.784986', 'step': 18779, 'epoch': 3} {'type': 'loss', 'content': 0.05817453935742378, 'timestamp': '2025-09-30 22:42:18.791054', 'step': 18780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.847107', 'step': 18780, 'epoch': 3} {'type': 'loss', 'content': 0.14393045008182526, 'timestamp': '2025-09-30 22:42:18.849514', 'step': 18781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:18.906432', 'step': 18781, 'epoch': 3} {'type': 'loss', 'content': 0.1057216227054596, 'timestamp': '2025-09-30 22:42:18.908857', 'step': 18782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:18.974893', 'step': 18782, 'epoch': 3} {'type': 'loss', 'content': 0.12447162717580795, 'timestamp': '2025-09-30 22:42:18.977057', 'step': 18783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:19.034748', 'step': 18783, 'epoch': 3} {'type': 'loss', 'content': 0.03721478208899498, 'timestamp': '2025-09-30 22:42:19.040763', 'step': 18784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:19.096600', 'step': 18784, 'epoch': 3} {'type': 'loss', 'content': 0.17375361919403076, 'timestamp': '2025-09-30 22:42:19.100218', 'step': 18785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:19.161161', 'step': 18785, 'epoch': 3} {'type': 'loss', 'content': 0.16303153336048126, 'timestamp': '2025-09-30 22:42:19.165506', 'step': 18786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:19.224600', 'step': 18786, 'epoch': 3} {'type': 'loss', 'content': 0.04109834507107735, 'timestamp': '2025-09-30 22:42:19.226948', 'step': 18787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:19.288403', 'step': 18787, 'epoch': 3} {'type': 'loss', 'content': 0.03787447512149811, 'timestamp': '2025-09-30 22:42:19.294811', 'step': 18788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:19.366213', 'step': 18788, 'epoch': 3} {'type': 'loss', 'content': 0.054853908717632294, 'timestamp': '2025-09-30 22:42:19.368893', 'step': 18789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:19.426424', 'step': 18789, 'epoch': 3} {'type': 'loss', 'content': 0.10067310929298401, 'timestamp': '2025-09-30 22:42:19.428904', 'step': 18790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:19.488432', 'step': 18790, 'epoch': 3} {'type': 'loss', 'content': 0.040293436497449875, 'timestamp': '2025-09-30 22:42:19.491266', 'step': 18791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:19.548363', 'step': 18791, 'epoch': 3} {'type': 'loss', 'content': 0.18235963582992554, 'timestamp': '2025-09-30 22:42:19.554694', 'step': 18792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:19.610616', 'step': 18792, 'epoch': 3} {'type': 'loss', 'content': 0.13639439642429352, 'timestamp': '2025-09-30 22:42:19.612681', 'step': 18793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:19.684043', 'step': 18793, 'epoch': 3} {'type': 'loss', 'content': 0.08828154951334, 'timestamp': '2025-09-30 22:42:19.696766', 'step': 18794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:19.754038', 'step': 18794, 'epoch': 3} {'type': 'loss', 'content': 0.047241613268852234, 'timestamp': '2025-09-30 22:42:19.756902', 'step': 18795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:19.813962', 'step': 18795, 'epoch': 3} {'type': 'loss', 'content': 0.024592231959104538, 'timestamp': '2025-09-30 22:42:19.819945', 'step': 18796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:19.887655', 'step': 18796, 'epoch': 3} {'type': 'loss', 'content': 0.03408424183726311, 'timestamp': '2025-09-30 22:42:19.891652', 'step': 18797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:19.948899', 'step': 18797, 'epoch': 3} {'type': 'loss', 'content': 0.058103226125240326, 'timestamp': '2025-09-30 22:42:19.952072', 'step': 18798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:20.008611', 'step': 18798, 'epoch': 3} {'type': 'loss', 'content': 0.15108118951320648, 'timestamp': '2025-09-30 22:42:20.018637', 'step': 18799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:20.085017', 'step': 18799, 'epoch': 3} {'type': 'loss', 'content': 0.05649254098534584, 'timestamp': '2025-09-30 22:42:20.091089', 'step': 18800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:20.146687', 'step': 18800, 'epoch': 3} {'type': 'loss', 'content': 0.10933533310890198, 'timestamp': '2025-09-30 22:42:20.148998', 'step': 18801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:20.221886', 'step': 18801, 'epoch': 3} {'type': 'loss', 'content': 0.07284098118543625, 'timestamp': '2025-09-30 22:42:20.224240', 'step': 18802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:20.298864', 'step': 18802, 'epoch': 3} {'type': 'loss', 'content': 0.04286050796508789, 'timestamp': '2025-09-30 22:42:20.302560', 'step': 18803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:20.364741', 'step': 18803, 'epoch': 3} {'type': 'loss', 'content': 0.14468225836753845, 'timestamp': '2025-09-30 22:42:20.371268', 'step': 18804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:20.428481', 'step': 18804, 'epoch': 3} {'type': 'loss', 'content': 0.11975021660327911, 'timestamp': '2025-09-30 22:42:20.430596', 'step': 18805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:20.487912', 'step': 18805, 'epoch': 3} {'type': 'loss', 'content': 0.06520452350378036, 'timestamp': '2025-09-30 22:42:20.490280', 'step': 18806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:20.547517', 'step': 18806, 'epoch': 3} {'type': 'loss', 'content': 0.07043981552124023, 'timestamp': '2025-09-30 22:42:20.549890', 'step': 18807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:20.606461', 'step': 18807, 'epoch': 3} {'type': 'loss', 'content': 0.09711121022701263, 'timestamp': '2025-09-30 22:42:20.613300', 'step': 18808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:20.671314', 'step': 18808, 'epoch': 3} {'type': 'loss', 'content': 0.13750818371772766, 'timestamp': '2025-09-30 22:42:20.673654', 'step': 18809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:20.733516', 'step': 18809, 'epoch': 3} {'type': 'loss', 'content': 0.09878865629434586, 'timestamp': '2025-09-30 22:42:20.735979', 'step': 18810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:20.797087', 'step': 18810, 'epoch': 3} {'type': 'loss', 'content': 0.0874323770403862, 'timestamp': '2025-09-30 22:42:20.799478', 'step': 18811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:20.860686', 'step': 18811, 'epoch': 3} {'type': 'loss', 'content': 0.10448446869850159, 'timestamp': '2025-09-30 22:42:20.867441', 'step': 18812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:20.925901', 'step': 18812, 'epoch': 3} {'type': 'loss', 'content': 0.02851567231118679, 'timestamp': '2025-09-30 22:42:20.928145', 'step': 18813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:20.987835', 'step': 18813, 'epoch': 3} {'type': 'loss', 'content': 0.0892723798751831, 'timestamp': '2025-09-30 22:42:20.990319', 'step': 18814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:21.047741', 'step': 18814, 'epoch': 3} {'type': 'loss', 'content': 0.09994792938232422, 'timestamp': '2025-09-30 22:42:21.050043', 'step': 18815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:21.107708', 'step': 18815, 'epoch': 3} {'type': 'loss', 'content': 0.08108288049697876, 'timestamp': '2025-09-30 22:42:21.114415', 'step': 18816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:21.175513', 'step': 18816, 'epoch': 3} {'type': 'loss', 'content': 0.08541088551282883, 'timestamp': '2025-09-30 22:42:21.178801', 'step': 18817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:21.237374', 'step': 18817, 'epoch': 3} {'type': 'loss', 'content': 0.07820044457912445, 'timestamp': '2025-09-30 22:42:21.239885', 'step': 18818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:21.319614', 'step': 18818, 'epoch': 3} {'type': 'loss', 'content': 0.06747707724571228, 'timestamp': '2025-09-30 22:42:21.324277', 'step': 18819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:21.389160', 'step': 18819, 'epoch': 3} {'type': 'loss', 'content': 0.1250244528055191, 'timestamp': '2025-09-30 22:42:21.395831', 'step': 18820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:21.451381', 'step': 18820, 'epoch': 3} {'type': 'loss', 'content': 0.14028947055339813, 'timestamp': '2025-09-30 22:42:21.453818', 'step': 18821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:21.523299', 'step': 18821, 'epoch': 3} {'type': 'loss', 'content': 0.040532130748033524, 'timestamp': '2025-09-30 22:42:21.526787', 'step': 18822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:21.588194', 'step': 18822, 'epoch': 3} {'type': 'loss', 'content': 0.08342085778713226, 'timestamp': '2025-09-30 22:42:21.592934', 'step': 18823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:21.650439', 'step': 18823, 'epoch': 3} {'type': 'loss', 'content': 0.058430008590221405, 'timestamp': '2025-09-30 22:42:21.656945', 'step': 18824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:21.714684', 'step': 18824, 'epoch': 3} {'type': 'loss', 'content': 0.053566884249448776, 'timestamp': '2025-09-30 22:42:21.718365', 'step': 18825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:21.775460', 'step': 18825, 'epoch': 3} {'type': 'loss', 'content': 0.0729590430855751, 'timestamp': '2025-09-30 22:42:21.778563', 'step': 18826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:21.840848', 'step': 18826, 'epoch': 3} {'type': 'loss', 'content': 0.07445443421602249, 'timestamp': '2025-09-30 22:42:21.843126', 'step': 18827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:21.900397', 'step': 18827, 'epoch': 3} {'type': 'loss', 'content': 0.03150514140725136, 'timestamp': '2025-09-30 22:42:21.906498', 'step': 18828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:21.965087', 'step': 18828, 'epoch': 3} {'type': 'loss', 'content': 0.1638108789920807, 'timestamp': '2025-09-30 22:42:21.972577', 'step': 18829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:22.031958', 'step': 18829, 'epoch': 3} {'type': 'loss', 'content': 0.10607974231243134, 'timestamp': '2025-09-30 22:42:22.035141', 'step': 18830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:22.092624', 'step': 18830, 'epoch': 3} {'type': 'loss', 'content': 0.08343435823917389, 'timestamp': '2025-09-30 22:42:22.095086', 'step': 18831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:22.152963', 'step': 18831, 'epoch': 3} {'type': 'loss', 'content': 0.08452852070331573, 'timestamp': '2025-09-30 22:42:22.160228', 'step': 18832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:22.220874', 'step': 18832, 'epoch': 3} {'type': 'loss', 'content': 0.09190626442432404, 'timestamp': '2025-09-30 22:42:22.223819', 'step': 18833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:22.282224', 'step': 18833, 'epoch': 3} {'type': 'loss', 'content': 0.06815319508314133, 'timestamp': '2025-09-30 22:42:22.285458', 'step': 18834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:22.344648', 'step': 18834, 'epoch': 3} {'type': 'loss', 'content': 0.09404170513153076, 'timestamp': '2025-09-30 22:42:22.347255', 'step': 18835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:22.408873', 'step': 18835, 'epoch': 3} {'type': 'loss', 'content': 0.024217411875724792, 'timestamp': '2025-09-30 22:42:22.415719', 'step': 18836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:22.471771', 'step': 18836, 'epoch': 3} {'type': 'loss', 'content': 0.06698990613222122, 'timestamp': '2025-09-30 22:42:22.475375', 'step': 18837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:22.535170', 'step': 18837, 'epoch': 3} {'type': 'loss', 'content': 0.14390307664871216, 'timestamp': '2025-09-30 22:42:22.537815', 'step': 18838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:22.629233', 'step': 18838, 'epoch': 3} {'type': 'loss', 'content': 0.19963599741458893, 'timestamp': '2025-09-30 22:42:22.632449', 'step': 18839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:22.691467', 'step': 18839, 'epoch': 3} {'type': 'loss', 'content': 0.09314223378896713, 'timestamp': '2025-09-30 22:42:22.698027', 'step': 18840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:22.755714', 'step': 18840, 'epoch': 3} {'type': 'loss', 'content': 0.06402259320020676, 'timestamp': '2025-09-30 22:42:22.759306', 'step': 18841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:22.816291', 'step': 18841, 'epoch': 3} {'type': 'loss', 'content': 0.1082700788974762, 'timestamp': '2025-09-30 22:42:22.819165', 'step': 18842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:22.877633', 'step': 18842, 'epoch': 3} {'type': 'loss', 'content': 0.11182032525539398, 'timestamp': '2025-09-30 22:42:22.879858', 'step': 18843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:22.937668', 'step': 18843, 'epoch': 3} {'type': 'loss', 'content': 0.09425748139619827, 'timestamp': '2025-09-30 22:42:22.943631', 'step': 18844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:23.003660', 'step': 18844, 'epoch': 3} {'type': 'loss', 'content': 0.05216126888990402, 'timestamp': '2025-09-30 22:42:23.006048', 'step': 18845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:23.066291', 'step': 18845, 'epoch': 3} {'type': 'loss', 'content': 0.0974314734339714, 'timestamp': '2025-09-30 22:42:23.068731', 'step': 18846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:23.127621', 'step': 18846, 'epoch': 3} {'type': 'loss', 'content': 0.03313402831554413, 'timestamp': '2025-09-30 22:42:23.130890', 'step': 18847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:23.199761', 'step': 18847, 'epoch': 3} {'type': 'loss', 'content': 0.05741015821695328, 'timestamp': '2025-09-30 22:42:23.205894', 'step': 18848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:23.278926', 'step': 18848, 'epoch': 3} {'type': 'loss', 'content': 0.08177395164966583, 'timestamp': '2025-09-30 22:42:23.281218', 'step': 18849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:23.339082', 'step': 18849, 'epoch': 3} {'type': 'loss', 'content': 0.08689936995506287, 'timestamp': '2025-09-30 22:42:23.341276', 'step': 18850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:23.410537', 'step': 18850, 'epoch': 3} {'type': 'loss', 'content': 0.13526514172554016, 'timestamp': '2025-09-30 22:42:23.412814', 'step': 18851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:23.473678', 'step': 18851, 'epoch': 3} {'type': 'loss', 'content': 0.10410898178815842, 'timestamp': '2025-09-30 22:42:23.479700', 'step': 18852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:23.538024', 'step': 18852, 'epoch': 3} {'type': 'loss', 'content': 0.08512476831674576, 'timestamp': '2025-09-30 22:42:23.540264', 'step': 18853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:23.597238', 'step': 18853, 'epoch': 3} {'type': 'loss', 'content': 0.07955000549554825, 'timestamp': '2025-09-30 22:42:23.599640', 'step': 18854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:23.657022', 'step': 18854, 'epoch': 3} {'type': 'loss', 'content': 0.11669732630252838, 'timestamp': '2025-09-30 22:42:23.659367', 'step': 18855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:23.736550', 'step': 18855, 'epoch': 3} {'type': 'loss', 'content': 0.04132098704576492, 'timestamp': '2025-09-30 22:42:23.742380', 'step': 18856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:23.803362', 'step': 18856, 'epoch': 3} {'type': 'loss', 'content': 0.1322011947631836, 'timestamp': '2025-09-30 22:42:23.805763', 'step': 18857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:23.875137', 'step': 18857, 'epoch': 3} {'type': 'loss', 'content': 0.10225524753332138, 'timestamp': '2025-09-30 22:42:23.877632', 'step': 18858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:23.946800', 'step': 18858, 'epoch': 3} {'type': 'loss', 'content': 0.09545983374118805, 'timestamp': '2025-09-30 22:42:23.948968', 'step': 18859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:24.006913', 'step': 18859, 'epoch': 3} {'type': 'loss', 'content': 0.0735197588801384, 'timestamp': '2025-09-30 22:42:24.013265', 'step': 18860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:24.082327', 'step': 18860, 'epoch': 3} {'type': 'loss', 'content': 0.1400071680545807, 'timestamp': '2025-09-30 22:42:24.084834', 'step': 18861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:24.141480', 'step': 18861, 'epoch': 3} {'type': 'loss', 'content': 0.14974400401115417, 'timestamp': '2025-09-30 22:42:24.144055', 'step': 18862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:24.200743', 'step': 18862, 'epoch': 3} {'type': 'loss', 'content': 0.12265526503324509, 'timestamp': '2025-09-30 22:42:24.202915', 'step': 18863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:24.260048', 'step': 18863, 'epoch': 3} {'type': 'loss', 'content': 0.08119446039199829, 'timestamp': '2025-09-30 22:42:24.266497', 'step': 18864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:24.324912', 'step': 18864, 'epoch': 3} {'type': 'loss', 'content': 0.10832218080759048, 'timestamp': '2025-09-30 22:42:24.327266', 'step': 18865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:24.386538', 'step': 18865, 'epoch': 3} {'type': 'loss', 'content': 0.12968604266643524, 'timestamp': '2025-09-30 22:42:24.389005', 'step': 18866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:24.447188', 'step': 18866, 'epoch': 3} {'type': 'loss', 'content': 0.10319600999355316, 'timestamp': '2025-09-30 22:42:24.449674', 'step': 18867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:24.507025', 'step': 18867, 'epoch': 3} {'type': 'loss', 'content': 0.10638529807329178, 'timestamp': '2025-09-30 22:42:24.512902', 'step': 18868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:24.577924', 'step': 18868, 'epoch': 3} {'type': 'loss', 'content': 0.09155072271823883, 'timestamp': '2025-09-30 22:42:24.580163', 'step': 18869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:24.639183', 'step': 18869, 'epoch': 3} {'type': 'loss', 'content': 0.050171706825494766, 'timestamp': '2025-09-30 22:42:24.642811', 'step': 18870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:24.708387', 'step': 18870, 'epoch': 3} {'type': 'loss', 'content': 0.17930221557617188, 'timestamp': '2025-09-30 22:42:24.711110', 'step': 18871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:24.800193', 'step': 18871, 'epoch': 3} {'type': 'loss', 'content': 0.07078807055950165, 'timestamp': '2025-09-30 22:42:24.809239', 'step': 18872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:24.875010', 'step': 18872, 'epoch': 3} {'type': 'loss', 'content': 0.11271435767412186, 'timestamp': '2025-09-30 22:42:24.878102', 'step': 18873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:24.943556', 'step': 18873, 'epoch': 3} {'type': 'loss', 'content': 0.021701758727431297, 'timestamp': '2025-09-30 22:42:24.946068', 'step': 18874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:25.003508', 'step': 18874, 'epoch': 3} {'type': 'loss', 'content': 0.055155202746391296, 'timestamp': '2025-09-30 22:42:25.005807', 'step': 18875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:25.068478', 'step': 18875, 'epoch': 3} {'type': 'loss', 'content': 0.05408299341797829, 'timestamp': '2025-09-30 22:42:25.074686', 'step': 18876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:25.132033', 'step': 18876, 'epoch': 3} {'type': 'loss', 'content': 0.08777943253517151, 'timestamp': '2025-09-30 22:42:25.134291', 'step': 18877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:25.204261', 'step': 18877, 'epoch': 3} {'type': 'loss', 'content': 0.14103294909000397, 'timestamp': '2025-09-30 22:42:25.206559', 'step': 18878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:25.265378', 'step': 18878, 'epoch': 3} {'type': 'loss', 'content': 0.09416228532791138, 'timestamp': '2025-09-30 22:42:25.267739', 'step': 18879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:25.324456', 'step': 18879, 'epoch': 3} {'type': 'loss', 'content': 0.03700951486825943, 'timestamp': '2025-09-30 22:42:25.330290', 'step': 18880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:25.386563', 'step': 18880, 'epoch': 3} {'type': 'loss', 'content': 0.09630994498729706, 'timestamp': '2025-09-30 22:42:25.388734', 'step': 18881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:25.446395', 'step': 18881, 'epoch': 3} {'type': 'loss', 'content': 0.08997810631990433, 'timestamp': '2025-09-30 22:42:25.448748', 'step': 18882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:25.505531', 'step': 18882, 'epoch': 3} {'type': 'loss', 'content': 0.11893579363822937, 'timestamp': '2025-09-30 22:42:25.507908', 'step': 18883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:25.567216', 'step': 18883, 'epoch': 3} {'type': 'loss', 'content': 0.07974854856729507, 'timestamp': '2025-09-30 22:42:25.573363', 'step': 18884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:25.630772', 'step': 18884, 'epoch': 3} {'type': 'loss', 'content': 0.06777480989694595, 'timestamp': '2025-09-30 22:42:25.632928', 'step': 18885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:25.696220', 'step': 18885, 'epoch': 3} {'type': 'loss', 'content': 0.1599978506565094, 'timestamp': '2025-09-30 22:42:25.698786', 'step': 18886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:25.757121', 'step': 18886, 'epoch': 3} {'type': 'loss', 'content': 0.05447859689593315, 'timestamp': '2025-09-30 22:42:25.759770', 'step': 18887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:25.817287', 'step': 18887, 'epoch': 3} {'type': 'loss', 'content': 0.06636075675487518, 'timestamp': '2025-09-30 22:42:25.822992', 'step': 18888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:25.881158', 'step': 18888, 'epoch': 3} {'type': 'loss', 'content': 0.16070707142353058, 'timestamp': '2025-09-30 22:42:25.883759', 'step': 18889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:25.942920', 'step': 18889, 'epoch': 3} {'type': 'loss', 'content': 0.16017229855060577, 'timestamp': '2025-09-30 22:42:25.946388', 'step': 18890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:26.003619', 'step': 18890, 'epoch': 3} {'type': 'loss', 'content': 0.07815021276473999, 'timestamp': '2025-09-30 22:42:26.007446', 'step': 18891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:26.078119', 'step': 18891, 'epoch': 3} {'type': 'loss', 'content': 0.08926340192556381, 'timestamp': '2025-09-30 22:42:26.084030', 'step': 18892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:26.141334', 'step': 18892, 'epoch': 3} {'type': 'loss', 'content': 0.10068067163228989, 'timestamp': '2025-09-30 22:42:26.143778', 'step': 18893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:26.202613', 'step': 18893, 'epoch': 3} {'type': 'loss', 'content': 0.07051755487918854, 'timestamp': '2025-09-30 22:42:26.204926', 'step': 18894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:26.264053', 'step': 18894, 'epoch': 3} {'type': 'loss', 'content': 0.07038358598947525, 'timestamp': '2025-09-30 22:42:26.266403', 'step': 18895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:26.326161', 'step': 18895, 'epoch': 3} {'type': 'loss', 'content': 0.08184263855218887, 'timestamp': '2025-09-30 22:42:26.332620', 'step': 18896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:26.391525', 'step': 18896, 'epoch': 3} {'type': 'loss', 'content': 0.10962879657745361, 'timestamp': '2025-09-30 22:42:26.394238', 'step': 18897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:26.452584', 'step': 18897, 'epoch': 3} {'type': 'loss', 'content': 0.08015704154968262, 'timestamp': '2025-09-30 22:42:26.455017', 'step': 18898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:26.530464', 'step': 18898, 'epoch': 3} {'type': 'loss', 'content': 0.11256946623325348, 'timestamp': '2025-09-30 22:42:26.532832', 'step': 18899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:26.597223', 'step': 18899, 'epoch': 3} {'type': 'loss', 'content': 0.06943394988775253, 'timestamp': '2025-09-30 22:42:26.603151', 'step': 18900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:26.659415', 'step': 18900, 'epoch': 3} {'type': 'loss', 'content': 0.10201622545719147, 'timestamp': '2025-09-30 22:42:26.662371', 'step': 18901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:42:26.736054', 'step': 18901, 'epoch': 3} {'type': 'loss', 'content': 0.08246950805187225, 'timestamp': '2025-09-30 22:42:26.738358', 'step': 18902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:26.797182', 'step': 18902, 'epoch': 3} {'type': 'loss', 'content': 0.07613439857959747, 'timestamp': '2025-09-30 22:42:26.800337', 'step': 18903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:26.858361', 'step': 18903, 'epoch': 3} {'type': 'loss', 'content': 0.05495643988251686, 'timestamp': '2025-09-30 22:42:26.864661', 'step': 18904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:26.929664', 'step': 18904, 'epoch': 3} {'type': 'loss', 'content': 0.08397563546895981, 'timestamp': '2025-09-30 22:42:26.932091', 'step': 18905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:27.010270', 'step': 18905, 'epoch': 3} {'type': 'loss', 'content': 0.03471124544739723, 'timestamp': '2025-09-30 22:42:27.012756', 'step': 18906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:27.070637', 'step': 18906, 'epoch': 3} {'type': 'loss', 'content': 0.0812547579407692, 'timestamp': '2025-09-30 22:42:27.073055', 'step': 18907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:27.129443', 'step': 18907, 'epoch': 3} {'type': 'loss', 'content': 0.058852434158325195, 'timestamp': '2025-09-30 22:42:27.136754', 'step': 18908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:27.196519', 'step': 18908, 'epoch': 3} {'type': 'loss', 'content': 0.0956626757979393, 'timestamp': '2025-09-30 22:42:27.199191', 'step': 18909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:42:27.256151', 'step': 18909, 'epoch': 3} {'type': 'loss', 'content': 0.07102080434560776, 'timestamp': '2025-09-30 22:42:27.259008', 'step': 18910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:27.317502', 'step': 18910, 'epoch': 3} {'type': 'loss', 'content': 0.09050142765045166, 'timestamp': '2025-09-30 22:42:27.319825', 'step': 18911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:27.381845', 'step': 18911, 'epoch': 3} {'type': 'loss', 'content': 0.049098871648311615, 'timestamp': '2025-09-30 22:42:27.387583', 'step': 18912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:27.444910', 'step': 18912, 'epoch': 3} {'type': 'loss', 'content': 0.03135216608643532, 'timestamp': '2025-09-30 22:42:27.447398', 'step': 18913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:27.504104', 'step': 18913, 'epoch': 3} {'type': 'loss', 'content': 0.06439866125583649, 'timestamp': '2025-09-30 22:42:27.506603', 'step': 18914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:27.564002', 'step': 18914, 'epoch': 3} {'type': 'loss', 'content': 0.06018361449241638, 'timestamp': '2025-09-30 22:42:27.566493', 'step': 18915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:27.638540', 'step': 18915, 'epoch': 3} {'type': 'loss', 'content': 0.13451571762561798, 'timestamp': '2025-09-30 22:42:27.644499', 'step': 18916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:27.701573', 'step': 18916, 'epoch': 3} {'type': 'loss', 'content': 0.05166088789701462, 'timestamp': '2025-09-30 22:42:27.704004', 'step': 18917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:27.762141', 'step': 18917, 'epoch': 3} {'type': 'loss', 'content': 0.176752507686615, 'timestamp': '2025-09-30 22:42:27.764868', 'step': 18918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:27.822599', 'step': 18918, 'epoch': 3} {'type': 'loss', 'content': 0.11602359265089035, 'timestamp': '2025-09-30 22:42:27.824915', 'step': 18919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:27.895654', 'step': 18919, 'epoch': 3} {'type': 'loss', 'content': 0.1816919893026352, 'timestamp': '2025-09-30 22:42:27.901498', 'step': 18920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:27.964635', 'step': 18920, 'epoch': 3} {'type': 'loss', 'content': 0.07654035836458206, 'timestamp': '2025-09-30 22:42:27.974998', 'step': 18921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:28.032105', 'step': 18921, 'epoch': 3} {'type': 'loss', 'content': 0.0441075824201107, 'timestamp': '2025-09-30 22:42:28.034559', 'step': 18922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:28.092117', 'step': 18922, 'epoch': 3} {'type': 'loss', 'content': 0.10029908269643784, 'timestamp': '2025-09-30 22:42:28.094258', 'step': 18923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:28.150960', 'step': 18923, 'epoch': 3} {'type': 'loss', 'content': 0.05877191945910454, 'timestamp': '2025-09-30 22:42:28.157009', 'step': 18924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:28.213766', 'step': 18924, 'epoch': 3} {'type': 'loss', 'content': 0.14846065640449524, 'timestamp': '2025-09-30 22:42:28.216158', 'step': 18925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:28.274843', 'step': 18925, 'epoch': 3} {'type': 'loss', 'content': 0.17772215604782104, 'timestamp': '2025-09-30 22:42:28.277291', 'step': 18926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:28.334108', 'step': 18926, 'epoch': 3} {'type': 'loss', 'content': 0.11738143116235733, 'timestamp': '2025-09-30 22:42:28.336911', 'step': 18927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:28.394281', 'step': 18927, 'epoch': 3} {'type': 'loss', 'content': 0.08349204808473587, 'timestamp': '2025-09-30 22:42:28.400301', 'step': 18928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:28.458205', 'step': 18928, 'epoch': 3} {'type': 'loss', 'content': 0.1708173304796219, 'timestamp': '2025-09-30 22:42:28.468218', 'step': 18929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:28.525536', 'step': 18929, 'epoch': 3} {'type': 'loss', 'content': 0.07374193519353867, 'timestamp': '2025-09-30 22:42:28.529302', 'step': 18930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:28.587270', 'step': 18930, 'epoch': 3} {'type': 'loss', 'content': 0.06549573689699173, 'timestamp': '2025-09-30 22:42:28.591877', 'step': 18931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:28.649582', 'step': 18931, 'epoch': 3} {'type': 'loss', 'content': 0.06372047960758209, 'timestamp': '2025-09-30 22:42:28.655769', 'step': 18932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:28.712096', 'step': 18932, 'epoch': 3} {'type': 'loss', 'content': 0.18137328326702118, 'timestamp': '2025-09-30 22:42:28.714419', 'step': 18933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:28.770678', 'step': 18933, 'epoch': 3} {'type': 'loss', 'content': 0.07189883291721344, 'timestamp': '2025-09-30 22:42:28.773356', 'step': 18934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:28.831513', 'step': 18934, 'epoch': 3} {'type': 'loss', 'content': 0.09362009167671204, 'timestamp': '2025-09-30 22:42:28.833961', 'step': 18935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:28.893186', 'step': 18935, 'epoch': 3} {'type': 'loss', 'content': 0.12738652527332306, 'timestamp': '2025-09-30 22:42:28.899134', 'step': 18936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:28.960856', 'step': 18936, 'epoch': 3} {'type': 'loss', 'content': 0.06068887561559677, 'timestamp': '2025-09-30 22:42:28.963186', 'step': 18937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:29.018931', 'step': 18937, 'epoch': 3} {'type': 'loss', 'content': 0.12813426554203033, 'timestamp': '2025-09-30 22:42:29.021142', 'step': 18938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:29.078390', 'step': 18938, 'epoch': 3} {'type': 'loss', 'content': 0.09186936914920807, 'timestamp': '2025-09-30 22:42:29.080788', 'step': 18939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:29.137736', 'step': 18939, 'epoch': 3} {'type': 'loss', 'content': 0.03679777681827545, 'timestamp': '2025-09-30 22:42:29.143607', 'step': 18940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:29.201055', 'step': 18940, 'epoch': 3} {'type': 'loss', 'content': 0.09903042763471603, 'timestamp': '2025-09-30 22:42:29.205100', 'step': 18941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:29.262715', 'step': 18941, 'epoch': 3} {'type': 'loss', 'content': 0.21281935274600983, 'timestamp': '2025-09-30 22:42:29.265438', 'step': 18942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:29.325461', 'step': 18942, 'epoch': 3} {'type': 'loss', 'content': 0.07113724946975708, 'timestamp': '2025-09-30 22:42:29.328079', 'step': 18943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:29.384918', 'step': 18943, 'epoch': 3} {'type': 'loss', 'content': 0.11812412738800049, 'timestamp': '2025-09-30 22:42:29.392354', 'step': 18944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:29.449506', 'step': 18944, 'epoch': 3} {'type': 'loss', 'content': 0.04530850052833557, 'timestamp': '2025-09-30 22:42:29.452498', 'step': 18945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:29.529906', 'step': 18945, 'epoch': 3} {'type': 'loss', 'content': 0.06992454081773758, 'timestamp': '2025-09-30 22:42:29.545447', 'step': 18946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:29.633596', 'step': 18946, 'epoch': 3} {'type': 'loss', 'content': 0.03923619166016579, 'timestamp': '2025-09-30 22:42:29.636870', 'step': 18947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:29.696302', 'step': 18947, 'epoch': 3} {'type': 'loss', 'content': 0.05918144807219505, 'timestamp': '2025-09-30 22:42:29.702683', 'step': 18948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:29.760307', 'step': 18948, 'epoch': 3} {'type': 'loss', 'content': 0.09594441950321198, 'timestamp': '2025-09-30 22:42:29.763500', 'step': 18949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:29.820632', 'step': 18949, 'epoch': 3} {'type': 'loss', 'content': 0.09362922608852386, 'timestamp': '2025-09-30 22:42:29.823267', 'step': 18950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:29.882141', 'step': 18950, 'epoch': 3} {'type': 'loss', 'content': 0.054990123957395554, 'timestamp': '2025-09-30 22:42:29.884774', 'step': 18951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:29.945218', 'step': 18951, 'epoch': 3} {'type': 'loss', 'content': 0.09409964084625244, 'timestamp': '2025-09-30 22:42:29.951253', 'step': 18952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:30.008484', 'step': 18952, 'epoch': 3} {'type': 'loss', 'content': 0.11739115417003632, 'timestamp': '2025-09-30 22:42:30.011250', 'step': 18953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:30.069648', 'step': 18953, 'epoch': 3} {'type': 'loss', 'content': 0.06565312296152115, 'timestamp': '2025-09-30 22:42:30.072688', 'step': 18954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.131615', 'step': 18954, 'epoch': 3} {'type': 'loss', 'content': 0.09073340892791748, 'timestamp': '2025-09-30 22:42:30.134431', 'step': 18955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:30.191658', 'step': 18955, 'epoch': 3} {'type': 'loss', 'content': 0.11641769111156464, 'timestamp': '2025-09-30 22:42:30.197736', 'step': 18956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.256086', 'step': 18956, 'epoch': 3} {'type': 'loss', 'content': 0.0945432037115097, 'timestamp': '2025-09-30 22:42:30.259066', 'step': 18957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.315973', 'step': 18957, 'epoch': 3} {'type': 'loss', 'content': 0.056467439979314804, 'timestamp': '2025-09-30 22:42:30.318705', 'step': 18958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.377283', 'step': 18958, 'epoch': 3} {'type': 'loss', 'content': 0.061338845640420914, 'timestamp': '2025-09-30 22:42:30.380224', 'step': 18959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:30.437530', 'step': 18959, 'epoch': 3} {'type': 'loss', 'content': 0.04327351972460747, 'timestamp': '2025-09-30 22:42:30.443841', 'step': 18960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.517745', 'step': 18960, 'epoch': 3} {'type': 'loss', 'content': 0.10880153626203537, 'timestamp': '2025-09-30 22:42:30.520306', 'step': 18961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.580561', 'step': 18961, 'epoch': 3} {'type': 'loss', 'content': 0.10629655420780182, 'timestamp': '2025-09-30 22:42:30.582816', 'step': 18962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.640246', 'step': 18962, 'epoch': 3} {'type': 'loss', 'content': 0.12715335190296173, 'timestamp': '2025-09-30 22:42:30.642396', 'step': 18963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:30.700663', 'step': 18963, 'epoch': 3} {'type': 'loss', 'content': 0.05875098332762718, 'timestamp': '2025-09-30 22:42:30.706554', 'step': 18964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:30.763158', 'step': 18964, 'epoch': 3} {'type': 'loss', 'content': 0.09143975377082825, 'timestamp': '2025-09-30 22:42:30.765573', 'step': 18965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:30.822495', 'step': 18965, 'epoch': 3} {'type': 'loss', 'content': 0.02389948070049286, 'timestamp': '2025-09-30 22:42:30.824918', 'step': 18966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:30.893541', 'step': 18966, 'epoch': 3} {'type': 'loss', 'content': 0.04749166965484619, 'timestamp': '2025-09-30 22:42:30.895506', 'step': 18967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:30.956269', 'step': 18967, 'epoch': 3} {'type': 'loss', 'content': 0.14190375804901123, 'timestamp': '2025-09-30 22:42:30.962479', 'step': 18968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.028893', 'step': 18968, 'epoch': 3} {'type': 'loss', 'content': 0.04845094308257103, 'timestamp': '2025-09-30 22:42:31.031256', 'step': 18969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.100302', 'step': 18969, 'epoch': 3} {'type': 'loss', 'content': 0.14865782856941223, 'timestamp': '2025-09-30 22:42:31.102920', 'step': 18970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.185338', 'step': 18970, 'epoch': 3} {'type': 'loss', 'content': 0.06335475295782089, 'timestamp': '2025-09-30 22:42:31.187643', 'step': 18971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:31.253315', 'step': 18971, 'epoch': 3} {'type': 'loss', 'content': 0.11525746434926987, 'timestamp': '2025-09-30 22:42:31.259326', 'step': 18972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:31.317623', 'step': 18972, 'epoch': 3} {'type': 'loss', 'content': 0.04918249696493149, 'timestamp': '2025-09-30 22:42:31.319921', 'step': 18973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.378089', 'step': 18973, 'epoch': 3} {'type': 'loss', 'content': 0.08696994185447693, 'timestamp': '2025-09-30 22:42:31.380530', 'step': 18974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:31.439187', 'step': 18974, 'epoch': 3} {'type': 'loss', 'content': 0.09486806392669678, 'timestamp': '2025-09-30 22:42:31.441677', 'step': 18975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.501740', 'step': 18975, 'epoch': 3} {'type': 'loss', 'content': 0.057711564004421234, 'timestamp': '2025-09-30 22:42:31.508341', 'step': 18976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.565577', 'step': 18976, 'epoch': 3} {'type': 'loss', 'content': 0.12413618713617325, 'timestamp': '2025-09-30 22:42:31.567777', 'step': 18977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.624382', 'step': 18977, 'epoch': 3} {'type': 'loss', 'content': 0.10764793306589127, 'timestamp': '2025-09-30 22:42:31.627320', 'step': 18978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:31.685680', 'step': 18978, 'epoch': 3} {'type': 'loss', 'content': 0.11422225087881088, 'timestamp': '2025-09-30 22:42:31.688049', 'step': 18979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.745358', 'step': 18979, 'epoch': 3} {'type': 'loss', 'content': 0.15932781994342804, 'timestamp': '2025-09-30 22:42:31.751382', 'step': 18980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:31.810910', 'step': 18980, 'epoch': 3} {'type': 'loss', 'content': 0.12672433257102966, 'timestamp': '2025-09-30 22:42:31.813397', 'step': 18981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:31.870358', 'step': 18981, 'epoch': 3} {'type': 'loss', 'content': 0.1647375375032425, 'timestamp': '2025-09-30 22:42:31.872856', 'step': 18982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:31.931382', 'step': 18982, 'epoch': 3} {'type': 'loss', 'content': 0.11714628338813782, 'timestamp': '2025-09-30 22:42:31.933867', 'step': 18983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:31.991782', 'step': 18983, 'epoch': 3} {'type': 'loss', 'content': 0.1348101794719696, 'timestamp': '2025-09-30 22:42:31.997686', 'step': 18984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.058383', 'step': 18984, 'epoch': 3} {'type': 'loss', 'content': 0.0752003863453865, 'timestamp': '2025-09-30 22:42:32.060708', 'step': 18985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.123958', 'step': 18985, 'epoch': 3} {'type': 'loss', 'content': 0.09357238560914993, 'timestamp': '2025-09-30 22:42:32.126185', 'step': 18986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:32.188712', 'step': 18986, 'epoch': 3} {'type': 'loss', 'content': 0.07637651264667511, 'timestamp': '2025-09-30 22:42:32.191068', 'step': 18987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.250839', 'step': 18987, 'epoch': 3} {'type': 'loss', 'content': 0.06325050443410873, 'timestamp': '2025-09-30 22:42:32.257408', 'step': 18988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:32.316940', 'step': 18988, 'epoch': 3} {'type': 'loss', 'content': 0.12164628505706787, 'timestamp': '2025-09-30 22:42:32.319470', 'step': 18989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:32.377903', 'step': 18989, 'epoch': 3} {'type': 'loss', 'content': 0.07908868044614792, 'timestamp': '2025-09-30 22:42:32.382017', 'step': 18990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.440370', 'step': 18990, 'epoch': 3} {'type': 'loss', 'content': 0.07754337787628174, 'timestamp': '2025-09-30 22:42:32.442754', 'step': 18991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.520021', 'step': 18991, 'epoch': 3} {'type': 'loss', 'content': 0.09151759743690491, 'timestamp': '2025-09-30 22:42:32.526209', 'step': 18992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.581965', 'step': 18992, 'epoch': 3} {'type': 'loss', 'content': 0.09375151991844177, 'timestamp': '2025-09-30 22:42:32.584370', 'step': 18993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:32.653478', 'step': 18993, 'epoch': 3} {'type': 'loss', 'content': 0.04210519418120384, 'timestamp': '2025-09-30 22:42:32.656118', 'step': 18994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:32.715905', 'step': 18994, 'epoch': 3} {'type': 'loss', 'content': 0.09298019111156464, 'timestamp': '2025-09-30 22:42:32.718397', 'step': 18995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:32.775765', 'step': 18995, 'epoch': 3} {'type': 'loss', 'content': 0.13721196353435516, 'timestamp': '2025-09-30 22:42:32.781905', 'step': 18996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:32.840634', 'step': 18996, 'epoch': 3} {'type': 'loss', 'content': 0.13467326760292053, 'timestamp': '2025-09-30 22:42:32.844480', 'step': 18997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.914934', 'step': 18997, 'epoch': 3} {'type': 'loss', 'content': 0.05247114598751068, 'timestamp': '2025-09-30 22:42:32.919436', 'step': 18998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:32.976963', 'step': 18998, 'epoch': 3} {'type': 'loss', 'content': 0.054214440286159515, 'timestamp': '2025-09-30 22:42:32.979073', 'step': 18999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:33.045300', 'step': 18999, 'epoch': 3} {'type': 'loss', 'content': 0.13942167162895203, 'timestamp': '2025-09-30 22:42:33.051473', 'step': 19000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19000', 'timestamp': '2025-09-30 22:42:33.674351', 'step': 19000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:33.734628', 'step': 19000, 'epoch': 3} {'type': 'loss', 'content': 0.06929989904165268, 'timestamp': '2025-09-30 22:42:33.736898', 'step': 19001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:33.794742', 'step': 19001, 'epoch': 3} {'type': 'loss', 'content': 0.10415956377983093, 'timestamp': '2025-09-30 22:42:33.797289', 'step': 19002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:33.856401', 'step': 19002, 'epoch': 3} {'type': 'loss', 'content': 0.05126097798347473, 'timestamp': '2025-09-30 22:42:33.859547', 'step': 19003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:33.922998', 'step': 19003, 'epoch': 3} {'type': 'loss', 'content': 0.08999792486429214, 'timestamp': '2025-09-30 22:42:33.928958', 'step': 19004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:33.994601', 'step': 19004, 'epoch': 3} {'type': 'loss', 'content': 0.12171539664268494, 'timestamp': '2025-09-30 22:42:33.996799', 'step': 19005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:34.054468', 'step': 19005, 'epoch': 3} {'type': 'loss', 'content': 0.08430614322423935, 'timestamp': '2025-09-30 22:42:34.056863', 'step': 19006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:34.114172', 'step': 19006, 'epoch': 3} {'type': 'loss', 'content': 0.1374143809080124, 'timestamp': '2025-09-30 22:42:34.116476', 'step': 19007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:34.183728', 'step': 19007, 'epoch': 3} {'type': 'loss', 'content': 0.0995776504278183, 'timestamp': '2025-09-30 22:42:34.189677', 'step': 19008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:34.246913', 'step': 19008, 'epoch': 3} {'type': 'loss', 'content': 0.09859295189380646, 'timestamp': '2025-09-30 22:42:34.249183', 'step': 19009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:34.316950', 'step': 19009, 'epoch': 3} {'type': 'loss', 'content': 0.025226375088095665, 'timestamp': '2025-09-30 22:42:34.319272', 'step': 19010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:34.376406', 'step': 19010, 'epoch': 3} {'type': 'loss', 'content': 0.027103934437036514, 'timestamp': '2025-09-30 22:42:34.392582', 'step': 19011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:34.450220', 'step': 19011, 'epoch': 3} {'type': 'loss', 'content': 0.030556511133909225, 'timestamp': '2025-09-30 22:42:34.456744', 'step': 19012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:34.513365', 'step': 19012, 'epoch': 3} {'type': 'loss', 'content': 0.07039397209882736, 'timestamp': '2025-09-30 22:42:34.521803', 'step': 19013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:34.595575', 'step': 19013, 'epoch': 3} {'type': 'loss', 'content': 0.05959613248705864, 'timestamp': '2025-09-30 22:42:34.597815', 'step': 19014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:34.666901', 'step': 19014, 'epoch': 3} {'type': 'loss', 'content': 0.0714016705751419, 'timestamp': '2025-09-30 22:42:34.669398', 'step': 19015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:34.733711', 'step': 19015, 'epoch': 3} {'type': 'loss', 'content': 0.114665187895298, 'timestamp': '2025-09-30 22:42:34.739809', 'step': 19016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:34.798117', 'step': 19016, 'epoch': 3} {'type': 'loss', 'content': 0.06610206514596939, 'timestamp': '2025-09-30 22:42:34.800792', 'step': 19017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:34.862129', 'step': 19017, 'epoch': 3} {'type': 'loss', 'content': 0.08004192262887955, 'timestamp': '2025-09-30 22:42:34.864743', 'step': 19018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:34.922562', 'step': 19018, 'epoch': 3} {'type': 'loss', 'content': 0.10587603598833084, 'timestamp': '2025-09-30 22:42:34.925143', 'step': 19019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:34.993038', 'step': 19019, 'epoch': 3} {'type': 'loss', 'content': 0.038309842348098755, 'timestamp': '2025-09-30 22:42:34.999086', 'step': 19020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:35.069271', 'step': 19020, 'epoch': 3} {'type': 'loss', 'content': 0.053574882447719574, 'timestamp': '2025-09-30 22:42:35.072016', 'step': 19021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:35.129675', 'step': 19021, 'epoch': 3} {'type': 'loss', 'content': 0.06904188543558121, 'timestamp': '2025-09-30 22:42:35.132424', 'step': 19022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:35.189666', 'step': 19022, 'epoch': 3} {'type': 'loss', 'content': 0.09236016124486923, 'timestamp': '2025-09-30 22:42:35.193244', 'step': 19023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:35.264538', 'step': 19023, 'epoch': 3} {'type': 'loss', 'content': 0.05005413293838501, 'timestamp': '2025-09-30 22:42:35.270590', 'step': 19024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:35.328309', 'step': 19024, 'epoch': 3} {'type': 'loss', 'content': 0.0665452778339386, 'timestamp': '2025-09-30 22:42:35.330633', 'step': 19025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:35.387726', 'step': 19025, 'epoch': 3} {'type': 'loss', 'content': 0.08948405086994171, 'timestamp': '2025-09-30 22:42:35.392474', 'step': 19026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:35.453300', 'step': 19026, 'epoch': 3} {'type': 'loss', 'content': 0.11265016347169876, 'timestamp': '2025-09-30 22:42:35.455773', 'step': 19027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:35.513698', 'step': 19027, 'epoch': 3} {'type': 'loss', 'content': 0.07828857749700546, 'timestamp': '2025-09-30 22:42:35.519859', 'step': 19028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:35.579208', 'step': 19028, 'epoch': 3} {'type': 'loss', 'content': 0.06709186732769012, 'timestamp': '2025-09-30 22:42:35.581909', 'step': 19029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:35.640213', 'step': 19029, 'epoch': 3} {'type': 'loss', 'content': 0.07124065607786179, 'timestamp': '2025-09-30 22:42:35.644111', 'step': 19030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:35.702908', 'step': 19030, 'epoch': 3} {'type': 'loss', 'content': 0.1510952115058899, 'timestamp': '2025-09-30 22:42:35.706211', 'step': 19031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:35.763892', 'step': 19031, 'epoch': 3} {'type': 'loss', 'content': 0.05259883776307106, 'timestamp': '2025-09-30 22:42:35.772412', 'step': 19032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:35.833273', 'step': 19032, 'epoch': 3} {'type': 'loss', 'content': 0.06481558084487915, 'timestamp': '2025-09-30 22:42:35.836030', 'step': 19033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:35.894133', 'step': 19033, 'epoch': 3} {'type': 'loss', 'content': 0.06293462216854095, 'timestamp': '2025-09-30 22:42:35.898745', 'step': 19034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:35.965848', 'step': 19034, 'epoch': 3} {'type': 'loss', 'content': 0.09208593517541885, 'timestamp': '2025-09-30 22:42:35.968538', 'step': 19035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:36.029392', 'step': 19035, 'epoch': 3} {'type': 'loss', 'content': 0.01244006585329771, 'timestamp': '2025-09-30 22:42:36.036484', 'step': 19036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:36.106841', 'step': 19036, 'epoch': 3} {'type': 'loss', 'content': 0.11447101086378098, 'timestamp': '2025-09-30 22:42:36.111639', 'step': 19037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:36.169635', 'step': 19037, 'epoch': 3} {'type': 'loss', 'content': 0.04555879905819893, 'timestamp': '2025-09-30 22:42:36.172308', 'step': 19038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:36.239481', 'step': 19038, 'epoch': 3} {'type': 'loss', 'content': 0.10562007874250412, 'timestamp': '2025-09-30 22:42:36.242347', 'step': 19039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:36.300064', 'step': 19039, 'epoch': 3} {'type': 'loss', 'content': 0.02528980001807213, 'timestamp': '2025-09-30 22:42:36.306017', 'step': 19040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:36.363057', 'step': 19040, 'epoch': 3} {'type': 'loss', 'content': 0.0851636677980423, 'timestamp': '2025-09-30 22:42:36.365970', 'step': 19041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:36.425727', 'step': 19041, 'epoch': 3} {'type': 'loss', 'content': 0.06015992909669876, 'timestamp': '2025-09-30 22:42:36.428991', 'step': 19042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:36.489252', 'step': 19042, 'epoch': 3} {'type': 'loss', 'content': 0.062228403985500336, 'timestamp': '2025-09-30 22:42:36.492304', 'step': 19043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:36.550521', 'step': 19043, 'epoch': 3} {'type': 'loss', 'content': 0.09332861751317978, 'timestamp': '2025-09-30 22:42:36.556883', 'step': 19044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:36.613471', 'step': 19044, 'epoch': 3} {'type': 'loss', 'content': 0.08176414668560028, 'timestamp': '2025-09-30 22:42:36.619031', 'step': 19045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:36.679690', 'step': 19045, 'epoch': 3} {'type': 'loss', 'content': 0.09023961424827576, 'timestamp': '2025-09-30 22:42:36.683175', 'step': 19046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:36.739746', 'step': 19046, 'epoch': 3} {'type': 'loss', 'content': 0.0812031552195549, 'timestamp': '2025-09-30 22:42:36.742930', 'step': 19047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:36.803280', 'step': 19047, 'epoch': 3} {'type': 'loss', 'content': 0.051771461963653564, 'timestamp': '2025-09-30 22:42:36.812808', 'step': 19048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:36.868296', 'step': 19048, 'epoch': 3} {'type': 'loss', 'content': 0.027274614199995995, 'timestamp': '2025-09-30 22:42:36.871498', 'step': 19049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:36.931297', 'step': 19049, 'epoch': 3} {'type': 'loss', 'content': 0.14969050884246826, 'timestamp': '2025-09-30 22:42:36.934422', 'step': 19050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.007456', 'step': 19050, 'epoch': 3} {'type': 'loss', 'content': 0.1059509664773941, 'timestamp': '2025-09-30 22:42:37.010240', 'step': 19051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:37.068808', 'step': 19051, 'epoch': 3} {'type': 'loss', 'content': 0.06366528570652008, 'timestamp': '2025-09-30 22:42:37.074888', 'step': 19052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:37.132987', 'step': 19052, 'epoch': 3} {'type': 'loss', 'content': 0.09089607000350952, 'timestamp': '2025-09-30 22:42:37.135682', 'step': 19053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:37.192958', 'step': 19053, 'epoch': 3} {'type': 'loss', 'content': 0.11184544116258621, 'timestamp': '2025-09-30 22:42:37.195476', 'step': 19054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.254742', 'step': 19054, 'epoch': 3} {'type': 'loss', 'content': 0.05758880823850632, 'timestamp': '2025-09-30 22:42:37.257549', 'step': 19055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.313743', 'step': 19055, 'epoch': 3} {'type': 'loss', 'content': 0.11757665127515793, 'timestamp': '2025-09-30 22:42:37.319739', 'step': 19056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.375690', 'step': 19056, 'epoch': 3} {'type': 'loss', 'content': 0.10614312440156937, 'timestamp': '2025-09-30 22:42:37.378192', 'step': 19057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:37.438663', 'step': 19057, 'epoch': 3} {'type': 'loss', 'content': 0.16933511197566986, 'timestamp': '2025-09-30 22:42:37.441244', 'step': 19058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:37.498956', 'step': 19058, 'epoch': 3} {'type': 'loss', 'content': 0.15557590126991272, 'timestamp': '2025-09-30 22:42:37.502074', 'step': 19059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.560204', 'step': 19059, 'epoch': 3} {'type': 'loss', 'content': 0.052618641406297684, 'timestamp': '2025-09-30 22:42:37.566814', 'step': 19060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:37.634346', 'step': 19060, 'epoch': 3} {'type': 'loss', 'content': 0.02057693526148796, 'timestamp': '2025-09-30 22:42:37.636993', 'step': 19061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.694634', 'step': 19061, 'epoch': 3} {'type': 'loss', 'content': 0.04222477599978447, 'timestamp': '2025-09-30 22:42:37.697379', 'step': 19062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:37.755606', 'step': 19062, 'epoch': 3} {'type': 'loss', 'content': 0.07327070832252502, 'timestamp': '2025-09-30 22:42:37.757570', 'step': 19063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.817063', 'step': 19063, 'epoch': 3} {'type': 'loss', 'content': 0.04612686485052109, 'timestamp': '2025-09-30 22:42:37.822897', 'step': 19064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:37.880198', 'step': 19064, 'epoch': 3} {'type': 'loss', 'content': 0.12205003947019577, 'timestamp': '2025-09-30 22:42:37.885574', 'step': 19065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:37.945055', 'step': 19065, 'epoch': 3} {'type': 'loss', 'content': 0.07373562455177307, 'timestamp': '2025-09-30 22:42:37.947566', 'step': 19066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:38.005942', 'step': 19066, 'epoch': 3} {'type': 'loss', 'content': 0.03217137232422829, 'timestamp': '2025-09-30 22:42:38.008730', 'step': 19067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:38.066800', 'step': 19067, 'epoch': 3} {'type': 'loss', 'content': 0.050255827605724335, 'timestamp': '2025-09-30 22:42:38.073234', 'step': 19068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:38.132020', 'step': 19068, 'epoch': 3} {'type': 'loss', 'content': 0.08808062970638275, 'timestamp': '2025-09-30 22:42:38.135112', 'step': 19069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:38.194684', 'step': 19069, 'epoch': 3} {'type': 'loss', 'content': 0.0818350538611412, 'timestamp': '2025-09-30 22:42:38.197079', 'step': 19070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:38.255592', 'step': 19070, 'epoch': 3} {'type': 'loss', 'content': 0.09653200209140778, 'timestamp': '2025-09-30 22:42:38.257856', 'step': 19071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:38.316004', 'step': 19071, 'epoch': 3} {'type': 'loss', 'content': 0.09159903973340988, 'timestamp': '2025-09-30 22:42:38.322427', 'step': 19072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:38.396027', 'step': 19072, 'epoch': 3} {'type': 'loss', 'content': 0.10524305701255798, 'timestamp': '2025-09-30 22:42:38.398885', 'step': 19073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:38.457040', 'step': 19073, 'epoch': 3} {'type': 'loss', 'content': 0.09653438627719879, 'timestamp': '2025-09-30 22:42:38.459631', 'step': 19074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:38.518568', 'step': 19074, 'epoch': 3} {'type': 'loss', 'content': 0.0357586145401001, 'timestamp': '2025-09-30 22:42:38.521619', 'step': 19075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:38.587479', 'step': 19075, 'epoch': 3} {'type': 'loss', 'content': 0.03718392550945282, 'timestamp': '2025-09-30 22:42:38.594250', 'step': 19076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:38.653100', 'step': 19076, 'epoch': 3} {'type': 'loss', 'content': 0.15086060762405396, 'timestamp': '2025-09-30 22:42:38.658011', 'step': 19077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:38.726558', 'step': 19077, 'epoch': 3} {'type': 'loss', 'content': 0.08975576609373093, 'timestamp': '2025-09-30 22:42:38.728798', 'step': 19078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:38.790577', 'step': 19078, 'epoch': 3} {'type': 'loss', 'content': 0.07701218128204346, 'timestamp': '2025-09-30 22:42:38.793179', 'step': 19079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:38.869339', 'step': 19079, 'epoch': 3} {'type': 'loss', 'content': 0.09165753424167633, 'timestamp': '2025-09-30 22:42:38.875476', 'step': 19080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:38.932290', 'step': 19080, 'epoch': 3} {'type': 'loss', 'content': 0.04371387138962746, 'timestamp': '2025-09-30 22:42:38.938977', 'step': 19081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:39.012017', 'step': 19081, 'epoch': 3} {'type': 'loss', 'content': 0.12071716785430908, 'timestamp': '2025-09-30 22:42:39.015337', 'step': 19082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:39.072936', 'step': 19082, 'epoch': 3} {'type': 'loss', 'content': 0.12446117401123047, 'timestamp': '2025-09-30 22:42:39.076106', 'step': 19083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:39.132742', 'step': 19083, 'epoch': 3} {'type': 'loss', 'content': 0.028580350801348686, 'timestamp': '2025-09-30 22:42:39.139595', 'step': 19084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:39.209168', 'step': 19084, 'epoch': 3} {'type': 'loss', 'content': 0.07653771340847015, 'timestamp': '2025-09-30 22:42:39.211906', 'step': 19085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:39.269995', 'step': 19085, 'epoch': 3} {'type': 'loss', 'content': 0.09349102526903152, 'timestamp': '2025-09-30 22:42:39.273512', 'step': 19086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:39.335047', 'step': 19086, 'epoch': 3} {'type': 'loss', 'content': 0.046039026230573654, 'timestamp': '2025-09-30 22:42:39.337757', 'step': 19087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:39.396787', 'step': 19087, 'epoch': 3} {'type': 'loss', 'content': 0.019450638443231583, 'timestamp': '2025-09-30 22:42:39.402907', 'step': 19088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:39.473548', 'step': 19088, 'epoch': 3} {'type': 'loss', 'content': 0.0425013042986393, 'timestamp': '2025-09-30 22:42:39.477665', 'step': 19089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:39.550537', 'step': 19089, 'epoch': 3} {'type': 'loss', 'content': 0.07182992249727249, 'timestamp': '2025-09-30 22:42:39.553673', 'step': 19090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:39.614217', 'step': 19090, 'epoch': 3} {'type': 'loss', 'content': 0.002402748679742217, 'timestamp': '2025-09-30 22:42:39.616920', 'step': 19091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:39.675954', 'step': 19091, 'epoch': 3} {'type': 'loss', 'content': 0.057387575507164, 'timestamp': '2025-09-30 22:42:39.684484', 'step': 19092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:39.746056', 'step': 19092, 'epoch': 3} {'type': 'loss', 'content': 0.07307596504688263, 'timestamp': '2025-09-30 22:42:39.748554', 'step': 19093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:39.808302', 'step': 19093, 'epoch': 3} {'type': 'loss', 'content': 0.01975458860397339, 'timestamp': '2025-09-30 22:42:39.811023', 'step': 19094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:39.869908', 'step': 19094, 'epoch': 3} {'type': 'loss', 'content': 0.040724191814661026, 'timestamp': '2025-09-30 22:42:39.875780', 'step': 19095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:39.934557', 'step': 19095, 'epoch': 3} {'type': 'loss', 'content': 0.032347701489925385, 'timestamp': '2025-09-30 22:42:39.943685', 'step': 19096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:40.005384', 'step': 19096, 'epoch': 3} {'type': 'loss', 'content': 0.08654085546731949, 'timestamp': '2025-09-30 22:42:40.016326', 'step': 19097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:40.085934', 'step': 19097, 'epoch': 3} {'type': 'loss', 'content': 0.07270053774118423, 'timestamp': '2025-09-30 22:42:40.091465', 'step': 19098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:40.157165', 'step': 19098, 'epoch': 3} {'type': 'loss', 'content': 0.11364364624023438, 'timestamp': '2025-09-30 22:42:40.161531', 'step': 19099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:40.225861', 'step': 19099, 'epoch': 3} {'type': 'loss', 'content': 0.06607073545455933, 'timestamp': '2025-09-30 22:42:40.233421', 'step': 19100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:40.295373', 'step': 19100, 'epoch': 3} {'type': 'loss', 'content': 0.03358812630176544, 'timestamp': '2025-09-30 22:42:40.298427', 'step': 19101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:40.357355', 'step': 19101, 'epoch': 3} {'type': 'loss', 'content': 0.061137855052948, 'timestamp': '2025-09-30 22:42:40.359744', 'step': 19102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:40.430337', 'step': 19102, 'epoch': 3} {'type': 'loss', 'content': 0.1581166833639145, 'timestamp': '2025-09-30 22:42:40.435303', 'step': 19103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:40.501601', 'step': 19103, 'epoch': 3} {'type': 'loss', 'content': 0.07714627683162689, 'timestamp': '2025-09-30 22:42:40.510211', 'step': 19104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:40.578678', 'step': 19104, 'epoch': 3} {'type': 'loss', 'content': 0.09980528056621552, 'timestamp': '2025-09-30 22:42:40.582635', 'step': 19105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:40.642690', 'step': 19105, 'epoch': 3} {'type': 'loss', 'content': 0.05580073595046997, 'timestamp': '2025-09-30 22:42:40.645074', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:42:56.709782', 'step': 19106, 'epoch': 3} {'type': 'pplx', 'content': 7503.194033846198, 'timestamp': '2025-09-30 22:42:56.714499', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:56.774894', 'step': 19106, 'epoch': 3} {'type': 'loss', 'content': 0.040859442204236984, 'timestamp': '2025-09-30 22:42:56.793792', 'step': 19107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:56.872962', 'step': 19107, 'epoch': 3} {'type': 'loss', 'content': 0.04468327388167381, 'timestamp': '2025-09-30 22:42:56.895232', 'step': 19108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:56.974215', 'step': 19108, 'epoch': 3} {'type': 'loss', 'content': 0.021794140338897705, 'timestamp': '2025-09-30 22:42:56.980520', 'step': 19109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:57.063816', 'step': 19109, 'epoch': 3} {'type': 'loss', 'content': 0.10972905904054642, 'timestamp': '2025-09-30 22:42:57.073330', 'step': 19110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:57.134623', 'step': 19110, 'epoch': 3} {'type': 'loss', 'content': 0.07224579900503159, 'timestamp': '2025-09-30 22:42:57.147904', 'step': 19111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:57.208030', 'step': 19111, 'epoch': 3} {'type': 'loss', 'content': 0.09927636384963989, 'timestamp': '2025-09-30 22:42:57.233922', 'step': 19112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:57.301522', 'step': 19112, 'epoch': 3} {'type': 'loss', 'content': 0.07551884651184082, 'timestamp': '2025-09-30 22:42:57.311387', 'step': 19113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:42:57.391441', 'step': 19113, 'epoch': 3} {'type': 'loss', 'content': 0.13788768649101257, 'timestamp': '2025-09-30 22:42:57.401053', 'step': 19114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:57.470835', 'step': 19114, 'epoch': 3} {'type': 'loss', 'content': 0.04321708902716637, 'timestamp': '2025-09-30 22:42:57.482165', 'step': 19115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:57.545774', 'step': 19115, 'epoch': 3} {'type': 'loss', 'content': 0.06883818656206131, 'timestamp': '2025-09-30 22:42:57.575817', 'step': 19116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:57.637855', 'step': 19116, 'epoch': 3} {'type': 'loss', 'content': 0.043963685631752014, 'timestamp': '2025-09-30 22:42:57.651678', 'step': 19117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:57.722589', 'step': 19117, 'epoch': 3} {'type': 'loss', 'content': 0.06725683063268661, 'timestamp': '2025-09-30 22:42:57.753453', 'step': 19118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:57.828532', 'step': 19118, 'epoch': 3} {'type': 'loss', 'content': 0.07297766953706741, 'timestamp': '2025-09-30 22:42:57.838407', 'step': 19119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:57.908252', 'step': 19119, 'epoch': 3} {'type': 'loss', 'content': 0.043920788913965225, 'timestamp': '2025-09-30 22:42:57.921322', 'step': 19120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:57.986388', 'step': 19120, 'epoch': 3} {'type': 'loss', 'content': 0.08221506327390671, 'timestamp': '2025-09-30 22:42:57.993859', 'step': 19121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:58.056454', 'step': 19121, 'epoch': 3} {'type': 'loss', 'content': 0.1577274054288864, 'timestamp': '2025-09-30 22:42:58.064741', 'step': 19122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:58.127323', 'step': 19122, 'epoch': 3} {'type': 'loss', 'content': 0.0746704712510109, 'timestamp': '2025-09-30 22:42:58.129940', 'step': 19123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:58.188789', 'step': 19123, 'epoch': 3} {'type': 'loss', 'content': 0.15452603995800018, 'timestamp': '2025-09-30 22:42:58.195602', 'step': 19124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.254963', 'step': 19124, 'epoch': 3} {'type': 'loss', 'content': 0.077395498752594, 'timestamp': '2025-09-30 22:42:58.257871', 'step': 19125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.329151', 'step': 19125, 'epoch': 3} {'type': 'loss', 'content': 0.09099330753087997, 'timestamp': '2025-09-30 22:42:58.332698', 'step': 19126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.391703', 'step': 19126, 'epoch': 3} {'type': 'loss', 'content': 0.04977960139513016, 'timestamp': '2025-09-30 22:42:58.395123', 'step': 19127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.457837', 'step': 19127, 'epoch': 3} {'type': 'loss', 'content': 0.05339660868048668, 'timestamp': '2025-09-30 22:42:58.464769', 'step': 19128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:58.523634', 'step': 19128, 'epoch': 3} {'type': 'loss', 'content': 0.12366706877946854, 'timestamp': '2025-09-30 22:42:58.526020', 'step': 19129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:58.585730', 'step': 19129, 'epoch': 3} {'type': 'loss', 'content': 0.10573754459619522, 'timestamp': '2025-09-30 22:42:58.588185', 'step': 19130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.646740', 'step': 19130, 'epoch': 3} {'type': 'loss', 'content': 0.0370921716094017, 'timestamp': '2025-09-30 22:42:58.649340', 'step': 19131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.715057', 'step': 19131, 'epoch': 3} {'type': 'loss', 'content': 0.21718133985996246, 'timestamp': '2025-09-30 22:42:58.724427', 'step': 19132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.788335', 'step': 19132, 'epoch': 3} {'type': 'loss', 'content': 0.039438243955373764, 'timestamp': '2025-09-30 22:42:58.796365', 'step': 19133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:58.855710', 'step': 19133, 'epoch': 3} {'type': 'loss', 'content': 0.03536819666624069, 'timestamp': '2025-09-30 22:42:58.858956', 'step': 19134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:58.928562', 'step': 19134, 'epoch': 3} {'type': 'loss', 'content': 0.08701754361391068, 'timestamp': '2025-09-30 22:42:58.931762', 'step': 19135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:58.999602', 'step': 19135, 'epoch': 3} {'type': 'loss', 'content': 0.11216039955615997, 'timestamp': '2025-09-30 22:42:59.005689', 'step': 19136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:59.062794', 'step': 19136, 'epoch': 3} {'type': 'loss', 'content': 0.11386171728372574, 'timestamp': '2025-09-30 22:42:59.065244', 'step': 19137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:59.135638', 'step': 19137, 'epoch': 3} {'type': 'loss', 'content': 0.11419913172721863, 'timestamp': '2025-09-30 22:42:59.137985', 'step': 19138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:59.199660', 'step': 19138, 'epoch': 3} {'type': 'loss', 'content': 0.11951039731502533, 'timestamp': '2025-09-30 22:42:59.202397', 'step': 19139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:42:59.259828', 'step': 19139, 'epoch': 3} {'type': 'loss', 'content': 0.09291079640388489, 'timestamp': '2025-09-30 22:42:59.270978', 'step': 19140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:59.329338', 'step': 19140, 'epoch': 3} {'type': 'loss', 'content': 0.05384065583348274, 'timestamp': '2025-09-30 22:42:59.335865', 'step': 19141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:59.398181', 'step': 19141, 'epoch': 3} {'type': 'loss', 'content': 0.051820121705532074, 'timestamp': '2025-09-30 22:42:59.404306', 'step': 19142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:59.465399', 'step': 19142, 'epoch': 3} {'type': 'loss', 'content': 0.12606796622276306, 'timestamp': '2025-09-30 22:42:59.472085', 'step': 19143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:59.543641', 'step': 19143, 'epoch': 3} {'type': 'loss', 'content': 0.09187570214271545, 'timestamp': '2025-09-30 22:42:59.553299', 'step': 19144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:59.625186', 'step': 19144, 'epoch': 3} {'type': 'loss', 'content': 0.06654318422079086, 'timestamp': '2025-09-30 22:42:59.632801', 'step': 19145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:59.701069', 'step': 19145, 'epoch': 3} {'type': 'loss', 'content': 0.09166335314512253, 'timestamp': '2025-09-30 22:42:59.708746', 'step': 19146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:42:59.767471', 'step': 19146, 'epoch': 3} {'type': 'loss', 'content': 0.1569109410047531, 'timestamp': '2025-09-30 22:42:59.776338', 'step': 19147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:42:59.839078', 'step': 19147, 'epoch': 3} {'type': 'loss', 'content': 0.11182926595211029, 'timestamp': '2025-09-30 22:42:59.845632', 'step': 19148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:42:59.914961', 'step': 19148, 'epoch': 3} {'type': 'loss', 'content': 0.06170089915394783, 'timestamp': '2025-09-30 22:42:59.917938', 'step': 19149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:42:59.976266', 'step': 19149, 'epoch': 3} {'type': 'loss', 'content': 0.05324138328433037, 'timestamp': '2025-09-30 22:42:59.979031', 'step': 19150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:00.037673', 'step': 19150, 'epoch': 3} {'type': 'loss', 'content': 0.027133788913488388, 'timestamp': '2025-09-30 22:43:00.040260', 'step': 19151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:00.109654', 'step': 19151, 'epoch': 3} {'type': 'loss', 'content': 0.09187690168619156, 'timestamp': '2025-09-30 22:43:00.115966', 'step': 19152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:00.177981', 'step': 19152, 'epoch': 3} {'type': 'loss', 'content': 0.09819810092449188, 'timestamp': '2025-09-30 22:43:00.182057', 'step': 19153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:00.241737', 'step': 19153, 'epoch': 3} {'type': 'loss', 'content': 0.11364762485027313, 'timestamp': '2025-09-30 22:43:00.247978', 'step': 19154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:00.310198', 'step': 19154, 'epoch': 3} {'type': 'loss', 'content': 0.14816465973854065, 'timestamp': '2025-09-30 22:43:00.318865', 'step': 19155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:00.382646', 'step': 19155, 'epoch': 3} {'type': 'loss', 'content': 0.14862078428268433, 'timestamp': '2025-09-30 22:43:00.396769', 'step': 19156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:00.459656', 'step': 19156, 'epoch': 3} {'type': 'loss', 'content': 0.07672852277755737, 'timestamp': '2025-09-30 22:43:00.462544', 'step': 19157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:00.525270', 'step': 19157, 'epoch': 3} {'type': 'loss', 'content': 0.08799386024475098, 'timestamp': '2025-09-30 22:43:00.536099', 'step': 19158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:00.594657', 'step': 19158, 'epoch': 3} {'type': 'loss', 'content': 0.04931578412652016, 'timestamp': '2025-09-30 22:43:00.598051', 'step': 19159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:00.667751', 'step': 19159, 'epoch': 3} {'type': 'loss', 'content': 0.007263487204909325, 'timestamp': '2025-09-30 22:43:00.674700', 'step': 19160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:00.744972', 'step': 19160, 'epoch': 3} {'type': 'loss', 'content': 0.03352097421884537, 'timestamp': '2025-09-30 22:43:00.747777', 'step': 19161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:00.809626', 'step': 19161, 'epoch': 3} {'type': 'loss', 'content': 0.04955446720123291, 'timestamp': '2025-09-30 22:43:00.813499', 'step': 19162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:00.876435', 'step': 19162, 'epoch': 3} {'type': 'loss', 'content': 0.10551386326551437, 'timestamp': '2025-09-30 22:43:00.879390', 'step': 19163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:00.936124', 'step': 19163, 'epoch': 3} {'type': 'loss', 'content': 0.12112012505531311, 'timestamp': '2025-09-30 22:43:00.948293', 'step': 19164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:01.006118', 'step': 19164, 'epoch': 3} {'type': 'loss', 'content': 0.06600353866815567, 'timestamp': '2025-09-30 22:43:01.008629', 'step': 19165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:01.069086', 'step': 19165, 'epoch': 3} {'type': 'loss', 'content': 0.08239957690238953, 'timestamp': '2025-09-30 22:43:01.074505', 'step': 19166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:01.144649', 'step': 19166, 'epoch': 3} {'type': 'loss', 'content': 0.172319233417511, 'timestamp': '2025-09-30 22:43:01.147328', 'step': 19167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:01.205904', 'step': 19167, 'epoch': 3} {'type': 'loss', 'content': 0.1027374342083931, 'timestamp': '2025-09-30 22:43:01.212623', 'step': 19168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:01.273217', 'step': 19168, 'epoch': 3} {'type': 'loss', 'content': 0.1326843500137329, 'timestamp': '2025-09-30 22:43:01.280750', 'step': 19169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:01.345242', 'step': 19169, 'epoch': 3} {'type': 'loss', 'content': 0.14662738144397736, 'timestamp': '2025-09-30 22:43:01.351391', 'step': 19170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:01.409792', 'step': 19170, 'epoch': 3} {'type': 'loss', 'content': 0.11776050925254822, 'timestamp': '2025-09-30 22:43:01.412726', 'step': 19171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:01.473627', 'step': 19171, 'epoch': 3} {'type': 'loss', 'content': 0.07104719430208206, 'timestamp': '2025-09-30 22:43:01.480023', 'step': 19172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:01.539716', 'step': 19172, 'epoch': 3} {'type': 'loss', 'content': 0.0780157521367073, 'timestamp': '2025-09-30 22:43:01.547996', 'step': 19173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:01.612520', 'step': 19173, 'epoch': 3} {'type': 'loss', 'content': 0.12264789640903473, 'timestamp': '2025-09-30 22:43:01.619883', 'step': 19174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:01.690405', 'step': 19174, 'epoch': 3} {'type': 'loss', 'content': 0.11649445444345474, 'timestamp': '2025-09-30 22:43:01.692739', 'step': 19175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:43:01.750596', 'step': 19175, 'epoch': 3} {'type': 'loss', 'content': 0.07038526237010956, 'timestamp': '2025-09-30 22:43:01.766967', 'step': 19176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:01.827605', 'step': 19176, 'epoch': 3} {'type': 'loss', 'content': 0.11403889954090118, 'timestamp': '2025-09-30 22:43:01.830634', 'step': 19177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:01.889456', 'step': 19177, 'epoch': 3} {'type': 'loss', 'content': 0.1893152892589569, 'timestamp': '2025-09-30 22:43:01.893226', 'step': 19178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:01.951380', 'step': 19178, 'epoch': 3} {'type': 'loss', 'content': 0.07989222556352615, 'timestamp': '2025-09-30 22:43:01.954907', 'step': 19179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:02.015328', 'step': 19179, 'epoch': 3} {'type': 'loss', 'content': 0.07778467237949371, 'timestamp': '2025-09-30 22:43:02.022007', 'step': 19180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:02.085295', 'step': 19180, 'epoch': 3} {'type': 'loss', 'content': 0.14791283011436462, 'timestamp': '2025-09-30 22:43:02.088085', 'step': 19181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:02.146892', 'step': 19181, 'epoch': 3} {'type': 'loss', 'content': 0.058320678770542145, 'timestamp': '2025-09-30 22:43:02.152517', 'step': 19182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:02.217384', 'step': 19182, 'epoch': 3} {'type': 'loss', 'content': 0.09967764467000961, 'timestamp': '2025-09-30 22:43:02.220091', 'step': 19183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:02.287378', 'step': 19183, 'epoch': 3} {'type': 'loss', 'content': 0.02165813371539116, 'timestamp': '2025-09-30 22:43:02.293763', 'step': 19184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:02.354246', 'step': 19184, 'epoch': 3} {'type': 'loss', 'content': 0.06476499885320663, 'timestamp': '2025-09-30 22:43:02.358430', 'step': 19185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:02.417932', 'step': 19185, 'epoch': 3} {'type': 'loss', 'content': 0.14641810953617096, 'timestamp': '2025-09-30 22:43:02.421573', 'step': 19186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:02.480616', 'step': 19186, 'epoch': 3} {'type': 'loss', 'content': 0.046237602829933167, 'timestamp': '2025-09-30 22:43:02.483647', 'step': 19187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:02.545624', 'step': 19187, 'epoch': 3} {'type': 'loss', 'content': 0.10268063098192215, 'timestamp': '2025-09-30 22:43:02.552223', 'step': 19188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:02.627288', 'step': 19188, 'epoch': 3} {'type': 'loss', 'content': 0.1530856192111969, 'timestamp': '2025-09-30 22:43:02.631427', 'step': 19189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:02.691510', 'step': 19189, 'epoch': 3} {'type': 'loss', 'content': 0.05258588865399361, 'timestamp': '2025-09-30 22:43:02.694505', 'step': 19190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:02.752666', 'step': 19190, 'epoch': 3} {'type': 'loss', 'content': 0.06116533279418945, 'timestamp': '2025-09-30 22:43:02.755299', 'step': 19191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:02.824113', 'step': 19191, 'epoch': 3} {'type': 'loss', 'content': 0.10760772228240967, 'timestamp': '2025-09-30 22:43:02.830508', 'step': 19192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:02.908600', 'step': 19192, 'epoch': 3} {'type': 'loss', 'content': 0.09411242604255676, 'timestamp': '2025-09-30 22:43:02.911855', 'step': 19193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:02.969065', 'step': 19193, 'epoch': 3} {'type': 'loss', 'content': 0.12828540802001953, 'timestamp': '2025-09-30 22:43:02.971980', 'step': 19194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:03.035257', 'step': 19194, 'epoch': 3} {'type': 'loss', 'content': 0.09948903322219849, 'timestamp': '2025-09-30 22:43:03.043899', 'step': 19195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:03.107995', 'step': 19195, 'epoch': 3} {'type': 'loss', 'content': 0.060253262519836426, 'timestamp': '2025-09-30 22:43:03.120542', 'step': 19196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:03.178857', 'step': 19196, 'epoch': 3} {'type': 'loss', 'content': 0.07239755243062973, 'timestamp': '2025-09-30 22:43:03.181923', 'step': 19197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:03.240280', 'step': 19197, 'epoch': 3} {'type': 'loss', 'content': 0.027942923828959465, 'timestamp': '2025-09-30 22:43:03.243963', 'step': 19198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:03.314164', 'step': 19198, 'epoch': 3} {'type': 'loss', 'content': 0.08637932687997818, 'timestamp': '2025-09-30 22:43:03.317603', 'step': 19199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:03.379898', 'step': 19199, 'epoch': 3} {'type': 'loss', 'content': 0.15160401165485382, 'timestamp': '2025-09-30 22:43:03.386322', 'step': 19200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:03.444216', 'step': 19200, 'epoch': 3} {'type': 'loss', 'content': 0.08952073752880096, 'timestamp': '2025-09-30 22:43:03.451348', 'step': 19201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:03.516270', 'step': 19201, 'epoch': 3} {'type': 'loss', 'content': 0.026653317734599113, 'timestamp': '2025-09-30 22:43:03.518972', 'step': 19202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:03.591976', 'step': 19202, 'epoch': 3} {'type': 'loss', 'content': 0.08857721090316772, 'timestamp': '2025-09-30 22:43:03.594633', 'step': 19203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:03.655920', 'step': 19203, 'epoch': 3} {'type': 'loss', 'content': 0.10313548147678375, 'timestamp': '2025-09-30 22:43:03.671472', 'step': 19204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:03.741998', 'step': 19204, 'epoch': 3} {'type': 'loss', 'content': 0.08846481889486313, 'timestamp': '2025-09-30 22:43:03.745055', 'step': 19205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:03.815819', 'step': 19205, 'epoch': 3} {'type': 'loss', 'content': 0.10514921694993973, 'timestamp': '2025-09-30 22:43:03.818358', 'step': 19206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:03.875506', 'step': 19206, 'epoch': 3} {'type': 'loss', 'content': 0.14225302636623383, 'timestamp': '2025-09-30 22:43:03.877979', 'step': 19207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:03.936280', 'step': 19207, 'epoch': 3} {'type': 'loss', 'content': 0.08928754180669785, 'timestamp': '2025-09-30 22:43:03.944508', 'step': 19208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:04.006817', 'step': 19208, 'epoch': 3} {'type': 'loss', 'content': 0.0772317424416542, 'timestamp': '2025-09-30 22:43:04.010648', 'step': 19209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:04.069249', 'step': 19209, 'epoch': 3} {'type': 'loss', 'content': 0.15389610826969147, 'timestamp': '2025-09-30 22:43:04.071765', 'step': 19210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:04.135596', 'step': 19210, 'epoch': 3} {'type': 'loss', 'content': 0.07498442381620407, 'timestamp': '2025-09-30 22:43:04.144628', 'step': 19211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:04.206329', 'step': 19211, 'epoch': 3} {'type': 'loss', 'content': 0.04695853963494301, 'timestamp': '2025-09-30 22:43:04.219428', 'step': 19212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:04.277650', 'step': 19212, 'epoch': 3} {'type': 'loss', 'content': 0.1945813000202179, 'timestamp': '2025-09-30 22:43:04.281106', 'step': 19213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:04.340126', 'step': 19213, 'epoch': 3} {'type': 'loss', 'content': 0.05019961670041084, 'timestamp': '2025-09-30 22:43:04.343625', 'step': 19214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:04.411568', 'step': 19214, 'epoch': 3} {'type': 'loss', 'content': 0.08158446848392487, 'timestamp': '2025-09-30 22:43:04.414438', 'step': 19215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:04.473500', 'step': 19215, 'epoch': 3} {'type': 'loss', 'content': 0.08496791124343872, 'timestamp': '2025-09-30 22:43:04.484631', 'step': 19216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:04.548422', 'step': 19216, 'epoch': 3} {'type': 'loss', 'content': 0.10879936814308167, 'timestamp': '2025-09-30 22:43:04.552432', 'step': 19217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:04.628166', 'step': 19217, 'epoch': 3} {'type': 'loss', 'content': 0.06819060444831848, 'timestamp': '2025-09-30 22:43:04.632769', 'step': 19218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:04.691580', 'step': 19218, 'epoch': 3} {'type': 'loss', 'content': 0.18339097499847412, 'timestamp': '2025-09-30 22:43:04.695508', 'step': 19219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:04.754804', 'step': 19219, 'epoch': 3} {'type': 'loss', 'content': 0.12573601305484772, 'timestamp': '2025-09-30 22:43:04.761822', 'step': 19220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:43:04.821090', 'step': 19220, 'epoch': 3} {'type': 'loss', 'content': 0.13152439892292023, 'timestamp': '2025-09-30 22:43:04.824148', 'step': 19221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:04.888748', 'step': 19221, 'epoch': 3} {'type': 'loss', 'content': 0.07837202399969101, 'timestamp': '2025-09-30 22:43:04.892639', 'step': 19222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:04.950161', 'step': 19222, 'epoch': 3} {'type': 'loss', 'content': 0.08148596435785294, 'timestamp': '2025-09-30 22:43:04.953079', 'step': 19223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:05.010829', 'step': 19223, 'epoch': 3} {'type': 'loss', 'content': 0.07261046022176743, 'timestamp': '2025-09-30 22:43:05.017028', 'step': 19224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:05.073758', 'step': 19224, 'epoch': 3} {'type': 'loss', 'content': 0.09313776344060898, 'timestamp': '2025-09-30 22:43:05.077639', 'step': 19225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:05.136146', 'step': 19225, 'epoch': 3} {'type': 'loss', 'content': 0.03233462944626808, 'timestamp': '2025-09-30 22:43:05.139748', 'step': 19226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:05.210231', 'step': 19226, 'epoch': 3} {'type': 'loss', 'content': 0.06298629939556122, 'timestamp': '2025-09-30 22:43:05.214128', 'step': 19227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:05.273667', 'step': 19227, 'epoch': 3} {'type': 'loss', 'content': 0.12040466070175171, 'timestamp': '2025-09-30 22:43:05.285214', 'step': 19228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:05.347216', 'step': 19228, 'epoch': 3} {'type': 'loss', 'content': 0.1049288958311081, 'timestamp': '2025-09-30 22:43:05.358069', 'step': 19229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:05.418503', 'step': 19229, 'epoch': 3} {'type': 'loss', 'content': 0.15888477861881256, 'timestamp': '2025-09-30 22:43:05.420998', 'step': 19230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:05.487592', 'step': 19230, 'epoch': 3} {'type': 'loss', 'content': 0.02444036491215229, 'timestamp': '2025-09-30 22:43:05.491496', 'step': 19231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:05.555730', 'step': 19231, 'epoch': 3} {'type': 'loss', 'content': 0.09617313742637634, 'timestamp': '2025-09-30 22:43:05.562470', 'step': 19232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:05.619583', 'step': 19232, 'epoch': 3} {'type': 'loss', 'content': 0.060253042727708817, 'timestamp': '2025-09-30 22:43:05.622883', 'step': 19233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:05.681415', 'step': 19233, 'epoch': 3} {'type': 'loss', 'content': 0.12134517729282379, 'timestamp': '2025-09-30 22:43:05.684504', 'step': 19234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:05.743132', 'step': 19234, 'epoch': 3} {'type': 'loss', 'content': 0.1336170732975006, 'timestamp': '2025-09-30 22:43:05.746042', 'step': 19235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:05.802517', 'step': 19235, 'epoch': 3} {'type': 'loss', 'content': 0.07503841072320938, 'timestamp': '2025-09-30 22:43:05.809460', 'step': 19236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:05.868747', 'step': 19236, 'epoch': 3} {'type': 'loss', 'content': 0.16035884618759155, 'timestamp': '2025-09-30 22:43:05.871680', 'step': 19237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:05.930186', 'step': 19237, 'epoch': 3} {'type': 'loss', 'content': 0.04381875693798065, 'timestamp': '2025-09-30 22:43:05.932563', 'step': 19238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:05.991559', 'step': 19238, 'epoch': 3} {'type': 'loss', 'content': 0.07102645188570023, 'timestamp': '2025-09-30 22:43:05.999604', 'step': 19239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:06.058160', 'step': 19239, 'epoch': 3} {'type': 'loss', 'content': 0.09453669935464859, 'timestamp': '2025-09-30 22:43:06.065458', 'step': 19240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:06.134894', 'step': 19240, 'epoch': 3} {'type': 'loss', 'content': 0.07591646164655685, 'timestamp': '2025-09-30 22:43:06.143903', 'step': 19241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:06.209149', 'step': 19241, 'epoch': 3} {'type': 'loss', 'content': 0.10660763084888458, 'timestamp': '2025-09-30 22:43:06.213302', 'step': 19242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:06.272877', 'step': 19242, 'epoch': 3} {'type': 'loss', 'content': 0.12129898369312286, 'timestamp': '2025-09-30 22:43:06.276512', 'step': 19243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:06.341549', 'step': 19243, 'epoch': 3} {'type': 'loss', 'content': 0.14706996083259583, 'timestamp': '2025-09-30 22:43:06.354687', 'step': 19244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:06.412532', 'step': 19244, 'epoch': 3} {'type': 'loss', 'content': 0.11006532609462738, 'timestamp': '2025-09-30 22:43:06.416178', 'step': 19245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:06.472780', 'step': 19245, 'epoch': 3} {'type': 'loss', 'content': 0.07690063863992691, 'timestamp': '2025-09-30 22:43:06.476320', 'step': 19246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:06.534047', 'step': 19246, 'epoch': 3} {'type': 'loss', 'content': 0.10714539140462875, 'timestamp': '2025-09-30 22:43:06.538468', 'step': 19247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:06.596111', 'step': 19247, 'epoch': 3} {'type': 'loss', 'content': 0.06775306165218353, 'timestamp': '2025-09-30 22:43:06.602694', 'step': 19248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:06.666184', 'step': 19248, 'epoch': 3} {'type': 'loss', 'content': 0.13948296010494232, 'timestamp': '2025-09-30 22:43:06.669795', 'step': 19249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:06.737548', 'step': 19249, 'epoch': 3} {'type': 'loss', 'content': 0.1671639084815979, 'timestamp': '2025-09-30 22:43:06.740772', 'step': 19250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:06.806882', 'step': 19250, 'epoch': 3} {'type': 'loss', 'content': 0.12836571037769318, 'timestamp': '2025-09-30 22:43:06.809531', 'step': 19251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:06.878968', 'step': 19251, 'epoch': 3} {'type': 'loss', 'content': 0.051733940839767456, 'timestamp': '2025-09-30 22:43:06.885968', 'step': 19252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:06.944341', 'step': 19252, 'epoch': 3} {'type': 'loss', 'content': 0.08116421848535538, 'timestamp': '2025-09-30 22:43:06.946712', 'step': 19253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:07.004969', 'step': 19253, 'epoch': 3} {'type': 'loss', 'content': 0.042892202734947205, 'timestamp': '2025-09-30 22:43:07.007901', 'step': 19254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:07.074322', 'step': 19254, 'epoch': 3} {'type': 'loss', 'content': 0.06629784405231476, 'timestamp': '2025-09-30 22:43:07.085496', 'step': 19255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:07.152631', 'step': 19255, 'epoch': 3} {'type': 'loss', 'content': 0.1282743513584137, 'timestamp': '2025-09-30 22:43:07.159755', 'step': 19256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:07.217535', 'step': 19256, 'epoch': 3} {'type': 'loss', 'content': 0.08461536467075348, 'timestamp': '2025-09-30 22:43:07.228860', 'step': 19257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:07.292371', 'step': 19257, 'epoch': 3} {'type': 'loss', 'content': 0.06226959824562073, 'timestamp': '2025-09-30 22:43:07.301457', 'step': 19258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:07.366805', 'step': 19258, 'epoch': 3} {'type': 'loss', 'content': 0.11635902523994446, 'timestamp': '2025-09-30 22:43:07.376028', 'step': 19259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:07.439715', 'step': 19259, 'epoch': 3} {'type': 'loss', 'content': 0.09284418076276779, 'timestamp': '2025-09-30 22:43:07.445990', 'step': 19260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:07.502141', 'step': 19260, 'epoch': 3} {'type': 'loss', 'content': 0.07120022177696228, 'timestamp': '2025-09-30 22:43:07.505481', 'step': 19261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:07.562184', 'step': 19261, 'epoch': 3} {'type': 'loss', 'content': 0.09294667094945908, 'timestamp': '2025-09-30 22:43:07.565046', 'step': 19262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:07.628645', 'step': 19262, 'epoch': 3} {'type': 'loss', 'content': 0.08666757494211197, 'timestamp': '2025-09-30 22:43:07.631268', 'step': 19263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:07.688083', 'step': 19263, 'epoch': 3} {'type': 'loss', 'content': 0.11901917308568954, 'timestamp': '2025-09-30 22:43:07.694854', 'step': 19264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:07.758109', 'step': 19264, 'epoch': 3} {'type': 'loss', 'content': 0.044411513954401016, 'timestamp': '2025-09-30 22:43:07.760907', 'step': 19265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:07.825769', 'step': 19265, 'epoch': 3} {'type': 'loss', 'content': 0.1269882470369339, 'timestamp': '2025-09-30 22:43:07.833164', 'step': 19266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:07.897872', 'step': 19266, 'epoch': 3} {'type': 'loss', 'content': 0.1054501160979271, 'timestamp': '2025-09-30 22:43:07.900916', 'step': 19267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:07.960629', 'step': 19267, 'epoch': 3} {'type': 'loss', 'content': 0.11943916976451874, 'timestamp': '2025-09-30 22:43:07.967066', 'step': 19268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:08.022671', 'step': 19268, 'epoch': 3} {'type': 'loss', 'content': 0.07610949873924255, 'timestamp': '2025-09-30 22:43:08.029932', 'step': 19269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:08.096060', 'step': 19269, 'epoch': 3} {'type': 'loss', 'content': 0.14734166860580444, 'timestamp': '2025-09-30 22:43:08.100171', 'step': 19270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:08.167926', 'step': 19270, 'epoch': 3} {'type': 'loss', 'content': 0.04739518463611603, 'timestamp': '2025-09-30 22:43:08.171529', 'step': 19271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:08.232641', 'step': 19271, 'epoch': 3} {'type': 'loss', 'content': 0.08971463888883591, 'timestamp': '2025-09-30 22:43:08.246879', 'step': 19272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:08.320066', 'step': 19272, 'epoch': 3} {'type': 'loss', 'content': 0.08130079507827759, 'timestamp': '2025-09-30 22:43:08.324880', 'step': 19273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:08.392329', 'step': 19273, 'epoch': 3} {'type': 'loss', 'content': 0.08509128540754318, 'timestamp': '2025-09-30 22:43:08.395801', 'step': 19274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:08.464554', 'step': 19274, 'epoch': 3} {'type': 'loss', 'content': 0.14405737817287445, 'timestamp': '2025-09-30 22:43:08.467656', 'step': 19275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:08.527838', 'step': 19275, 'epoch': 3} {'type': 'loss', 'content': 0.03047407604753971, 'timestamp': '2025-09-30 22:43:08.534568', 'step': 19276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:08.602189', 'step': 19276, 'epoch': 3} {'type': 'loss', 'content': 0.15598784387111664, 'timestamp': '2025-09-30 22:43:08.605993', 'step': 19277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:08.675356', 'step': 19277, 'epoch': 3} {'type': 'loss', 'content': 0.11802022904157639, 'timestamp': '2025-09-30 22:43:08.678787', 'step': 19278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:08.750916', 'step': 19278, 'epoch': 3} {'type': 'loss', 'content': 0.16086967289447784, 'timestamp': '2025-09-30 22:43:08.754549', 'step': 19279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:08.814258', 'step': 19279, 'epoch': 3} {'type': 'loss', 'content': 0.024094238877296448, 'timestamp': '2025-09-30 22:43:08.821817', 'step': 19280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:08.886502', 'step': 19280, 'epoch': 3} {'type': 'loss', 'content': 0.05691526457667351, 'timestamp': '2025-09-30 22:43:08.889976', 'step': 19281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:08.946635', 'step': 19281, 'epoch': 3} {'type': 'loss', 'content': 0.13235065340995789, 'timestamp': '2025-09-30 22:43:08.950493', 'step': 19282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:09.007171', 'step': 19282, 'epoch': 3} {'type': 'loss', 'content': 0.03601979836821556, 'timestamp': '2025-09-30 22:43:09.011221', 'step': 19283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:09.069376', 'step': 19283, 'epoch': 3} {'type': 'loss', 'content': 0.09675512462854385, 'timestamp': '2025-09-30 22:43:09.076808', 'step': 19284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:09.142677', 'step': 19284, 'epoch': 3} {'type': 'loss', 'content': 0.14645561575889587, 'timestamp': '2025-09-30 22:43:09.147366', 'step': 19285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:09.219526', 'step': 19285, 'epoch': 3} {'type': 'loss', 'content': 0.11058490723371506, 'timestamp': '2025-09-30 22:43:09.223147', 'step': 19286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:09.282426', 'step': 19286, 'epoch': 3} {'type': 'loss', 'content': 0.0694449171423912, 'timestamp': '2025-09-30 22:43:09.285745', 'step': 19287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:09.342839', 'step': 19287, 'epoch': 3} {'type': 'loss', 'content': 0.1292383372783661, 'timestamp': '2025-09-30 22:43:09.349495', 'step': 19288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:09.406799', 'step': 19288, 'epoch': 3} {'type': 'loss', 'content': 0.08824336528778076, 'timestamp': '2025-09-30 22:43:09.411067', 'step': 19289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:09.468369', 'step': 19289, 'epoch': 3} {'type': 'loss', 'content': 0.2190859615802765, 'timestamp': '2025-09-30 22:43:09.476386', 'step': 19290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:09.538497', 'step': 19290, 'epoch': 3} {'type': 'loss', 'content': 0.07592816650867462, 'timestamp': '2025-09-30 22:43:09.543371', 'step': 19291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:09.614185', 'step': 19291, 'epoch': 3} {'type': 'loss', 'content': 0.15858806669712067, 'timestamp': '2025-09-30 22:43:09.621267', 'step': 19292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:09.679283', 'step': 19292, 'epoch': 3} {'type': 'loss', 'content': 0.07090157270431519, 'timestamp': '2025-09-30 22:43:09.682848', 'step': 19293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:09.751575', 'step': 19293, 'epoch': 3} {'type': 'loss', 'content': 0.170600026845932, 'timestamp': '2025-09-30 22:43:09.755147', 'step': 19294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:09.813920', 'step': 19294, 'epoch': 3} {'type': 'loss', 'content': 0.1348758190870285, 'timestamp': '2025-09-30 22:43:09.817901', 'step': 19295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:09.877984', 'step': 19295, 'epoch': 3} {'type': 'loss', 'content': 0.06236831843852997, 'timestamp': '2025-09-30 22:43:09.891535', 'step': 19296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:09.952983', 'step': 19296, 'epoch': 3} {'type': 'loss', 'content': 0.04577478766441345, 'timestamp': '2025-09-30 22:43:09.956532', 'step': 19297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:10.018636', 'step': 19297, 'epoch': 3} {'type': 'loss', 'content': 0.06806636601686478, 'timestamp': '2025-09-30 22:43:10.022492', 'step': 19298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:10.097374', 'step': 19298, 'epoch': 3} {'type': 'loss', 'content': 0.14455342292785645, 'timestamp': '2025-09-30 22:43:10.101562', 'step': 19299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:10.169916', 'step': 19299, 'epoch': 3} {'type': 'loss', 'content': 0.09922007471323013, 'timestamp': '2025-09-30 22:43:10.178912', 'step': 19300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:10.238367', 'step': 19300, 'epoch': 3} {'type': 'loss', 'content': 0.1380271315574646, 'timestamp': '2025-09-30 22:43:10.241051', 'step': 19301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:10.300386', 'step': 19301, 'epoch': 3} {'type': 'loss', 'content': 0.1581190675497055, 'timestamp': '2025-09-30 22:43:10.307445', 'step': 19302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:10.373831', 'step': 19302, 'epoch': 3} {'type': 'loss', 'content': 0.07436433434486389, 'timestamp': '2025-09-30 22:43:10.376990', 'step': 19303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:10.443151', 'step': 19303, 'epoch': 3} {'type': 'loss', 'content': 0.035335689783096313, 'timestamp': '2025-09-30 22:43:10.450127', 'step': 19304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:10.508233', 'step': 19304, 'epoch': 3} {'type': 'loss', 'content': 0.12421128898859024, 'timestamp': '2025-09-30 22:43:10.511209', 'step': 19305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:10.569666', 'step': 19305, 'epoch': 3} {'type': 'loss', 'content': 0.1474403589963913, 'timestamp': '2025-09-30 22:43:10.573755', 'step': 19306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:10.638854', 'step': 19306, 'epoch': 3} {'type': 'loss', 'content': 0.08867450803518295, 'timestamp': '2025-09-30 22:43:10.642827', 'step': 19307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:10.701507', 'step': 19307, 'epoch': 3} {'type': 'loss', 'content': 0.031751155853271484, 'timestamp': '2025-09-30 22:43:10.716779', 'step': 19308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:10.777330', 'step': 19308, 'epoch': 3} {'type': 'loss', 'content': 0.12082548439502716, 'timestamp': '2025-09-30 22:43:10.781640', 'step': 19309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:10.846351', 'step': 19309, 'epoch': 3} {'type': 'loss', 'content': 0.05554192513227463, 'timestamp': '2025-09-30 22:43:10.849085', 'step': 19310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:10.911956', 'step': 19310, 'epoch': 3} {'type': 'loss', 'content': 0.09406261146068573, 'timestamp': '2025-09-30 22:43:10.915239', 'step': 19311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:10.973179', 'step': 19311, 'epoch': 3} {'type': 'loss', 'content': 0.12899935245513916, 'timestamp': '2025-09-30 22:43:10.980526', 'step': 19312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.041518', 'step': 19312, 'epoch': 3} {'type': 'loss', 'content': 0.03688693419098854, 'timestamp': '2025-09-30 22:43:11.045400', 'step': 19313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:11.104886', 'step': 19313, 'epoch': 3} {'type': 'loss', 'content': 0.13050834834575653, 'timestamp': '2025-09-30 22:43:11.108872', 'step': 19314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.167481', 'step': 19314, 'epoch': 3} {'type': 'loss', 'content': 0.0844431146979332, 'timestamp': '2025-09-30 22:43:11.170204', 'step': 19315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.232147', 'step': 19315, 'epoch': 3} {'type': 'loss', 'content': 0.14711561799049377, 'timestamp': '2025-09-30 22:43:11.239496', 'step': 19316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:11.299075', 'step': 19316, 'epoch': 3} {'type': 'loss', 'content': 0.07335766404867172, 'timestamp': '2025-09-30 22:43:11.302289', 'step': 19317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.364762', 'step': 19317, 'epoch': 3} {'type': 'loss', 'content': 0.08962497115135193, 'timestamp': '2025-09-30 22:43:11.367163', 'step': 19318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:11.431855', 'step': 19318, 'epoch': 3} {'type': 'loss', 'content': 0.09194226562976837, 'timestamp': '2025-09-30 22:43:11.434241', 'step': 19319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.492242', 'step': 19319, 'epoch': 3} {'type': 'loss', 'content': 0.12794041633605957, 'timestamp': '2025-09-30 22:43:11.502991', 'step': 19320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.561081', 'step': 19320, 'epoch': 3} {'type': 'loss', 'content': 0.10806979984045029, 'timestamp': '2025-09-30 22:43:11.568394', 'step': 19321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:11.631736', 'step': 19321, 'epoch': 3} {'type': 'loss', 'content': 0.21728186309337616, 'timestamp': '2025-09-30 22:43:11.634693', 'step': 19322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:11.692197', 'step': 19322, 'epoch': 3} {'type': 'loss', 'content': 0.04059131443500519, 'timestamp': '2025-09-30 22:43:11.696905', 'step': 19323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.754342', 'step': 19323, 'epoch': 3} {'type': 'loss', 'content': 0.07784567028284073, 'timestamp': '2025-09-30 22:43:11.761259', 'step': 19324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:11.825598', 'step': 19324, 'epoch': 3} {'type': 'loss', 'content': 0.057391487061977386, 'timestamp': '2025-09-30 22:43:11.829667', 'step': 19325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:11.895816', 'step': 19325, 'epoch': 3} {'type': 'loss', 'content': 0.14209087193012238, 'timestamp': '2025-09-30 22:43:11.898596', 'step': 19326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:11.959959', 'step': 19326, 'epoch': 3} {'type': 'loss', 'content': 0.04647596925497055, 'timestamp': '2025-09-30 22:43:11.963997', 'step': 19327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:12.030527', 'step': 19327, 'epoch': 3} {'type': 'loss', 'content': 0.11216799169778824, 'timestamp': '2025-09-30 22:43:12.041504', 'step': 19328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:12.102476', 'step': 19328, 'epoch': 3} {'type': 'loss', 'content': 0.0684942975640297, 'timestamp': '2025-09-30 22:43:12.105949', 'step': 19329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:12.166839', 'step': 19329, 'epoch': 3} {'type': 'loss', 'content': 0.058703433722257614, 'timestamp': '2025-09-30 22:43:12.169402', 'step': 19330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:12.233270', 'step': 19330, 'epoch': 3} {'type': 'loss', 'content': 0.14888258278369904, 'timestamp': '2025-09-30 22:43:12.235949', 'step': 19331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:12.299232', 'step': 19331, 'epoch': 3} {'type': 'loss', 'content': 0.08232075721025467, 'timestamp': '2025-09-30 22:43:12.307003', 'step': 19332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:12.376655', 'step': 19332, 'epoch': 3} {'type': 'loss', 'content': 0.11658787727355957, 'timestamp': '2025-09-30 22:43:12.379527', 'step': 19333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:12.458023', 'step': 19333, 'epoch': 3} {'type': 'loss', 'content': 0.15069888532161713, 'timestamp': '2025-09-30 22:43:12.460678', 'step': 19334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:12.525384', 'step': 19334, 'epoch': 3} {'type': 'loss', 'content': 0.013803776353597641, 'timestamp': '2025-09-30 22:43:12.528263', 'step': 19335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:12.606027', 'step': 19335, 'epoch': 3} {'type': 'loss', 'content': 0.10257941484451294, 'timestamp': '2025-09-30 22:43:12.614165', 'step': 19336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:12.684252', 'step': 19336, 'epoch': 3} {'type': 'loss', 'content': 0.060298774391412735, 'timestamp': '2025-09-30 22:43:12.687414', 'step': 19337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:12.756907', 'step': 19337, 'epoch': 3} {'type': 'loss', 'content': 0.09978491812944412, 'timestamp': '2025-09-30 22:43:12.763310', 'step': 19338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:12.835834', 'step': 19338, 'epoch': 3} {'type': 'loss', 'content': 0.08711740374565125, 'timestamp': '2025-09-30 22:43:12.839345', 'step': 19339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:12.906844', 'step': 19339, 'epoch': 3} {'type': 'loss', 'content': 0.1392841637134552, 'timestamp': '2025-09-30 22:43:12.918717', 'step': 19340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:12.988445', 'step': 19340, 'epoch': 3} {'type': 'loss', 'content': 0.10277774930000305, 'timestamp': '2025-09-30 22:43:12.992267', 'step': 19341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:13.058722', 'step': 19341, 'epoch': 3} {'type': 'loss', 'content': 0.11576498299837112, 'timestamp': '2025-09-30 22:43:13.063076', 'step': 19342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:13.130101', 'step': 19342, 'epoch': 3} {'type': 'loss', 'content': 0.1284920871257782, 'timestamp': '2025-09-30 22:43:13.136816', 'step': 19343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:13.201917', 'step': 19343, 'epoch': 3} {'type': 'loss', 'content': 0.12609358131885529, 'timestamp': '2025-09-30 22:43:13.210803', 'step': 19344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:13.275017', 'step': 19344, 'epoch': 3} {'type': 'loss', 'content': 0.08783093094825745, 'timestamp': '2025-09-30 22:43:13.277983', 'step': 19345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:13.351470', 'step': 19345, 'epoch': 3} {'type': 'loss', 'content': 0.1264304667711258, 'timestamp': '2025-09-30 22:43:13.360605', 'step': 19346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:13.426370', 'step': 19346, 'epoch': 3} {'type': 'loss', 'content': 0.08172064274549484, 'timestamp': '2025-09-30 22:43:13.431041', 'step': 19347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:13.497169', 'step': 19347, 'epoch': 3} {'type': 'loss', 'content': 0.04374518617987633, 'timestamp': '2025-09-30 22:43:13.507867', 'step': 19348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:13.578067', 'step': 19348, 'epoch': 3} {'type': 'loss', 'content': 0.1438588947057724, 'timestamp': '2025-09-30 22:43:13.584820', 'step': 19349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:13.660085', 'step': 19349, 'epoch': 3} {'type': 'loss', 'content': 0.06900244951248169, 'timestamp': '2025-09-30 22:43:13.665794', 'step': 19350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:13.734630', 'step': 19350, 'epoch': 3} {'type': 'loss', 'content': 0.06396271288394928, 'timestamp': '2025-09-30 22:43:13.737776', 'step': 19351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:13.818099', 'step': 19351, 'epoch': 3} {'type': 'loss', 'content': 0.13408444821834564, 'timestamp': '2025-09-30 22:43:13.826850', 'step': 19352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:13.897854', 'step': 19352, 'epoch': 3} {'type': 'loss', 'content': 0.0805891901254654, 'timestamp': '2025-09-30 22:43:13.900810', 'step': 19353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:13.973092', 'step': 19353, 'epoch': 3} {'type': 'loss', 'content': 0.1212342232465744, 'timestamp': '2025-09-30 22:43:13.978298', 'step': 19354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:14.060391', 'step': 19354, 'epoch': 3} {'type': 'loss', 'content': 0.146609827876091, 'timestamp': '2025-09-30 22:43:14.066905', 'step': 19355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:14.137123', 'step': 19355, 'epoch': 3} {'type': 'loss', 'content': 0.05794006586074829, 'timestamp': '2025-09-30 22:43:14.145839', 'step': 19356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:14.213398', 'step': 19356, 'epoch': 3} {'type': 'loss', 'content': 0.1562223583459854, 'timestamp': '2025-09-30 22:43:14.222074', 'step': 19357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:14.300718', 'step': 19357, 'epoch': 3} {'type': 'loss', 'content': 0.08184656500816345, 'timestamp': '2025-09-30 22:43:14.303529', 'step': 19358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:14.369747', 'step': 19358, 'epoch': 3} {'type': 'loss', 'content': 0.08903709053993225, 'timestamp': '2025-09-30 22:43:14.373007', 'step': 19359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:14.438881', 'step': 19359, 'epoch': 3} {'type': 'loss', 'content': 0.08300064504146576, 'timestamp': '2025-09-30 22:43:14.447289', 'step': 19360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:14.518721', 'step': 19360, 'epoch': 3} {'type': 'loss', 'content': 0.14897257089614868, 'timestamp': '2025-09-30 22:43:14.521540', 'step': 19361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:14.589550', 'step': 19361, 'epoch': 3} {'type': 'loss', 'content': 0.11639546602964401, 'timestamp': '2025-09-30 22:43:14.592453', 'step': 19362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:14.661516', 'step': 19362, 'epoch': 3} {'type': 'loss', 'content': 0.12445994466543198, 'timestamp': '2025-09-30 22:43:14.664025', 'step': 19363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:14.729039', 'step': 19363, 'epoch': 3} {'type': 'loss', 'content': 0.15762768685817719, 'timestamp': '2025-09-30 22:43:14.740552', 'step': 19364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:14.808760', 'step': 19364, 'epoch': 3} {'type': 'loss', 'content': 0.08974011242389679, 'timestamp': '2025-09-30 22:43:14.811604', 'step': 19365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:14.883323', 'step': 19365, 'epoch': 3} {'type': 'loss', 'content': 0.06211666017770767, 'timestamp': '2025-09-30 22:43:14.891050', 'step': 19366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:14.962674', 'step': 19366, 'epoch': 3} {'type': 'loss', 'content': 0.06874440610408783, 'timestamp': '2025-09-30 22:43:14.966377', 'step': 19367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:15.047018', 'step': 19367, 'epoch': 3} {'type': 'loss', 'content': 0.09531993418931961, 'timestamp': '2025-09-30 22:43:15.056072', 'step': 19368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:15.132859', 'step': 19368, 'epoch': 3} {'type': 'loss', 'content': 0.061153605580329895, 'timestamp': '2025-09-30 22:43:15.136884', 'step': 19369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:15.208285', 'step': 19369, 'epoch': 3} {'type': 'loss', 'content': 0.09137706458568573, 'timestamp': '2025-09-30 22:43:15.213127', 'step': 19370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:15.282519', 'step': 19370, 'epoch': 3} {'type': 'loss', 'content': 0.11259016394615173, 'timestamp': '2025-09-30 22:43:15.287009', 'step': 19371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:15.354133', 'step': 19371, 'epoch': 3} {'type': 'loss', 'content': 0.06429437547922134, 'timestamp': '2025-09-30 22:43:15.362812', 'step': 19372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:15.427366', 'step': 19372, 'epoch': 3} {'type': 'loss', 'content': 0.09525961428880692, 'timestamp': '2025-09-30 22:43:15.430086', 'step': 19373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:15.517014', 'step': 19373, 'epoch': 3} {'type': 'loss', 'content': 0.09461715072393417, 'timestamp': '2025-09-30 22:43:15.520232', 'step': 19374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:15.586578', 'step': 19374, 'epoch': 3} {'type': 'loss', 'content': 0.18369881808757782, 'timestamp': '2025-09-30 22:43:15.590387', 'step': 19375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:15.670963', 'step': 19375, 'epoch': 3} {'type': 'loss', 'content': 0.07562755793333054, 'timestamp': '2025-09-30 22:43:15.680203', 'step': 19376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:15.749008', 'step': 19376, 'epoch': 3} {'type': 'loss', 'content': 0.05226879194378853, 'timestamp': '2025-09-30 22:43:15.751992', 'step': 19377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:15.829773', 'step': 19377, 'epoch': 3} {'type': 'loss', 'content': 0.05269203335046768, 'timestamp': '2025-09-30 22:43:15.834319', 'step': 19378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:15.908310', 'step': 19378, 'epoch': 3} {'type': 'loss', 'content': 0.05028980225324631, 'timestamp': '2025-09-30 22:43:15.911254', 'step': 19379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:15.984820', 'step': 19379, 'epoch': 3} {'type': 'loss', 'content': 0.09943829476833344, 'timestamp': '2025-09-30 22:43:15.995191', 'step': 19380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:16.063087', 'step': 19380, 'epoch': 3} {'type': 'loss', 'content': 0.10043498128652573, 'timestamp': '2025-09-30 22:43:16.066342', 'step': 19381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:16.132711', 'step': 19381, 'epoch': 3} {'type': 'loss', 'content': 0.06465955823659897, 'timestamp': '2025-09-30 22:43:16.138170', 'step': 19382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:16.204123', 'step': 19382, 'epoch': 3} {'type': 'loss', 'content': 0.11894842237234116, 'timestamp': '2025-09-30 22:43:16.207758', 'step': 19383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:16.289110', 'step': 19383, 'epoch': 3} {'type': 'loss', 'content': 0.12267854064702988, 'timestamp': '2025-09-30 22:43:16.297888', 'step': 19384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:16.382342', 'step': 19384, 'epoch': 3} {'type': 'loss', 'content': 0.04040291905403137, 'timestamp': '2025-09-30 22:43:16.393217', 'step': 19385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:16.469956', 'step': 19385, 'epoch': 3} {'type': 'loss', 'content': 0.10285364836454391, 'timestamp': '2025-09-30 22:43:16.479319', 'step': 19386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:16.550675', 'step': 19386, 'epoch': 3} {'type': 'loss', 'content': 0.13013753294944763, 'timestamp': '2025-09-30 22:43:16.555139', 'step': 19387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:43:16.624839', 'step': 19387, 'epoch': 3} {'type': 'loss', 'content': 0.09091455489397049, 'timestamp': '2025-09-30 22:43:16.640049', 'step': 19388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:16.712867', 'step': 19388, 'epoch': 3} {'type': 'loss', 'content': 0.03629031777381897, 'timestamp': '2025-09-30 22:43:16.717269', 'step': 19389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:16.783394', 'step': 19389, 'epoch': 3} {'type': 'loss', 'content': 0.07758276909589767, 'timestamp': '2025-09-30 22:43:16.787712', 'step': 19390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:16.855567', 'step': 19390, 'epoch': 3} {'type': 'loss', 'content': 0.13190357387065887, 'timestamp': '2025-09-30 22:43:16.858726', 'step': 19391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:16.926074', 'step': 19391, 'epoch': 3} {'type': 'loss', 'content': 0.09022941440343857, 'timestamp': '2025-09-30 22:43:16.937277', 'step': 19392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:17.015809', 'step': 19392, 'epoch': 3} {'type': 'loss', 'content': 0.17432525753974915, 'timestamp': '2025-09-30 22:43:17.019333', 'step': 19393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:17.088413', 'step': 19393, 'epoch': 3} {'type': 'loss', 'content': 0.041565295308828354, 'timestamp': '2025-09-30 22:43:17.098337', 'step': 19394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:17.176854', 'step': 19394, 'epoch': 3} {'type': 'loss', 'content': 0.11325212568044662, 'timestamp': '2025-09-30 22:43:17.186604', 'step': 19395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:17.254290', 'step': 19395, 'epoch': 3} {'type': 'loss', 'content': 0.09792929142713547, 'timestamp': '2025-09-30 22:43:17.268493', 'step': 19396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:17.347865', 'step': 19396, 'epoch': 3} {'type': 'loss', 'content': 0.06476263701915741, 'timestamp': '2025-09-30 22:43:17.352682', 'step': 19397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:17.422152', 'step': 19397, 'epoch': 3} {'type': 'loss', 'content': 0.04877709224820137, 'timestamp': '2025-09-30 22:43:17.425008', 'step': 19398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:17.498623', 'step': 19398, 'epoch': 3} {'type': 'loss', 'content': 0.05221262201666832, 'timestamp': '2025-09-30 22:43:17.504524', 'step': 19399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:17.574138', 'step': 19399, 'epoch': 3} {'type': 'loss', 'content': 0.10225986689329147, 'timestamp': '2025-09-30 22:43:17.590294', 'step': 19400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:17.656817', 'step': 19400, 'epoch': 3} {'type': 'loss', 'content': 0.10899157077074051, 'timestamp': '2025-09-30 22:43:17.659355', 'step': 19401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:17.731452', 'step': 19401, 'epoch': 3} {'type': 'loss', 'content': 0.028997616842389107, 'timestamp': '2025-09-30 22:43:17.734645', 'step': 19402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:17.802166', 'step': 19402, 'epoch': 3} {'type': 'loss', 'content': 0.06920935213565826, 'timestamp': '2025-09-30 22:43:17.810011', 'step': 19403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:17.901366', 'step': 19403, 'epoch': 3} {'type': 'loss', 'content': 0.1218041405081749, 'timestamp': '2025-09-30 22:43:17.918478', 'step': 19404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:17.995675', 'step': 19404, 'epoch': 3} {'type': 'loss', 'content': 0.028149550780653954, 'timestamp': '2025-09-30 22:43:18.000985', 'step': 19405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:18.074520', 'step': 19405, 'epoch': 3} {'type': 'loss', 'content': 0.1015089675784111, 'timestamp': '2025-09-30 22:43:18.081214', 'step': 19406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:18.147411', 'step': 19406, 'epoch': 3} {'type': 'loss', 'content': 0.10930020362138748, 'timestamp': '2025-09-30 22:43:18.150665', 'step': 19407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:18.225500', 'step': 19407, 'epoch': 3} {'type': 'loss', 'content': 0.06416401267051697, 'timestamp': '2025-09-30 22:43:18.235320', 'step': 19408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:18.312704', 'step': 19408, 'epoch': 3} {'type': 'loss', 'content': 0.08547493070363998, 'timestamp': '2025-09-30 22:43:18.317408', 'step': 19409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:18.393994', 'step': 19409, 'epoch': 3} {'type': 'loss', 'content': 0.06211840733885765, 'timestamp': '2025-09-30 22:43:18.397695', 'step': 19410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:18.484599', 'step': 19410, 'epoch': 3} {'type': 'loss', 'content': 0.07514004409313202, 'timestamp': '2025-09-30 22:43:18.488570', 'step': 19411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:18.556168', 'step': 19411, 'epoch': 3} {'type': 'loss', 'content': 0.043925702571868896, 'timestamp': '2025-09-30 22:43:18.571885', 'step': 19412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:18.638834', 'step': 19412, 'epoch': 3} {'type': 'loss', 'content': 0.09779129922389984, 'timestamp': '2025-09-30 22:43:18.644347', 'step': 19413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:18.713858', 'step': 19413, 'epoch': 3} {'type': 'loss', 'content': 0.06936614960432053, 'timestamp': '2025-09-30 22:43:18.717839', 'step': 19414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:18.792632', 'step': 19414, 'epoch': 3} {'type': 'loss', 'content': 0.07925459742546082, 'timestamp': '2025-09-30 22:43:18.796936', 'step': 19415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:18.868891', 'step': 19415, 'epoch': 3} {'type': 'loss', 'content': 0.07174336910247803, 'timestamp': '2025-09-30 22:43:18.877263', 'step': 19416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:18.958409', 'step': 19416, 'epoch': 3} {'type': 'loss', 'content': 0.17675115168094635, 'timestamp': '2025-09-30 22:43:18.965130', 'step': 19417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:19.033888', 'step': 19417, 'epoch': 3} {'type': 'loss', 'content': 0.08644280582666397, 'timestamp': '2025-09-30 22:43:19.038305', 'step': 19418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:19.108072', 'step': 19418, 'epoch': 3} {'type': 'loss', 'content': 0.17084822058677673, 'timestamp': '2025-09-30 22:43:19.122035', 'step': 19419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:19.190075', 'step': 19419, 'epoch': 3} {'type': 'loss', 'content': 0.0759417936205864, 'timestamp': '2025-09-30 22:43:19.199769', 'step': 19420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:19.263367', 'step': 19420, 'epoch': 3} {'type': 'loss', 'content': 0.139486163854599, 'timestamp': '2025-09-30 22:43:19.266992', 'step': 19421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:19.346502', 'step': 19421, 'epoch': 3} {'type': 'loss', 'content': 0.047506436705589294, 'timestamp': '2025-09-30 22:43:19.352070', 'step': 19422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:19.421962', 'step': 19422, 'epoch': 3} {'type': 'loss', 'content': 0.14242567121982574, 'timestamp': '2025-09-30 22:43:19.425046', 'step': 19423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:19.492797', 'step': 19423, 'epoch': 3} {'type': 'loss', 'content': 0.07719582319259644, 'timestamp': '2025-09-30 22:43:19.504296', 'step': 19424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:19.579210', 'step': 19424, 'epoch': 3} {'type': 'loss', 'content': 0.10315454006195068, 'timestamp': '2025-09-30 22:43:19.582106', 'step': 19425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:19.662689', 'step': 19425, 'epoch': 3} {'type': 'loss', 'content': 0.07916948199272156, 'timestamp': '2025-09-30 22:43:19.665254', 'step': 19426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:19.738533', 'step': 19426, 'epoch': 3} {'type': 'loss', 'content': 0.15940940380096436, 'timestamp': '2025-09-30 22:43:19.741718', 'step': 19427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:19.813731', 'step': 19427, 'epoch': 3} {'type': 'loss', 'content': 0.11868676543235779, 'timestamp': '2025-09-30 22:43:19.822903', 'step': 19428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:19.899872', 'step': 19428, 'epoch': 3} {'type': 'loss', 'content': 0.09096790850162506, 'timestamp': '2025-09-30 22:43:19.904480', 'step': 19429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:19.978594', 'step': 19429, 'epoch': 3} {'type': 'loss', 'content': 0.12792475521564484, 'timestamp': '2025-09-30 22:43:19.983081', 'step': 19430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:20.061941', 'step': 19430, 'epoch': 3} {'type': 'loss', 'content': 0.07554040849208832, 'timestamp': '2025-09-30 22:43:20.065315', 'step': 19431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:20.143073', 'step': 19431, 'epoch': 3} {'type': 'loss', 'content': 0.09930939227342606, 'timestamp': '2025-09-30 22:43:20.151658', 'step': 19432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:20.234585', 'step': 19432, 'epoch': 3} {'type': 'loss', 'content': 0.119075246155262, 'timestamp': '2025-09-30 22:43:20.245850', 'step': 19433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:20.321455', 'step': 19433, 'epoch': 3} {'type': 'loss', 'content': 0.15009377896785736, 'timestamp': '2025-09-30 22:43:20.325004', 'step': 19434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:20.392540', 'step': 19434, 'epoch': 3} {'type': 'loss', 'content': 0.12103459239006042, 'timestamp': '2025-09-30 22:43:20.405570', 'step': 19435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:20.500115', 'step': 19435, 'epoch': 3} {'type': 'loss', 'content': 0.07460750639438629, 'timestamp': '2025-09-30 22:43:20.518305', 'step': 19436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:20.633068', 'step': 19436, 'epoch': 3} {'type': 'loss', 'content': 0.0720214918255806, 'timestamp': '2025-09-30 22:43:20.641379', 'step': 19437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:20.723602', 'step': 19437, 'epoch': 3} {'type': 'loss', 'content': 0.08496839553117752, 'timestamp': '2025-09-30 22:43:20.728462', 'step': 19438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:20.811885', 'step': 19438, 'epoch': 3} {'type': 'loss', 'content': 0.09423999488353729, 'timestamp': '2025-09-30 22:43:20.823967', 'step': 19439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:20.913723', 'step': 19439, 'epoch': 3} {'type': 'loss', 'content': 0.1043877974152565, 'timestamp': '2025-09-30 22:43:20.926323', 'step': 19440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:21.007596', 'step': 19440, 'epoch': 3} {'type': 'loss', 'content': 0.08125147968530655, 'timestamp': '2025-09-30 22:43:21.013211', 'step': 19441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:21.095335', 'step': 19441, 'epoch': 3} {'type': 'loss', 'content': 0.1251751333475113, 'timestamp': '2025-09-30 22:43:21.099410', 'step': 19442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:21.195603', 'step': 19442, 'epoch': 3} {'type': 'loss', 'content': 0.09891557693481445, 'timestamp': '2025-09-30 22:43:21.206927', 'step': 19443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:21.307331', 'step': 19443, 'epoch': 3} {'type': 'loss', 'content': 0.06322578340768814, 'timestamp': '2025-09-30 22:43:21.316247', 'step': 19444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:21.412411', 'step': 19444, 'epoch': 3} {'type': 'loss', 'content': 0.04258335381746292, 'timestamp': '2025-09-30 22:43:21.426146', 'step': 19445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:21.516625', 'step': 19445, 'epoch': 3} {'type': 'loss', 'content': 0.09029140323400497, 'timestamp': '2025-09-30 22:43:21.528790', 'step': 19446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:21.619047', 'step': 19446, 'epoch': 3} {'type': 'loss', 'content': 0.12525935471057892, 'timestamp': '2025-09-30 22:43:21.624581', 'step': 19447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:21.694831', 'step': 19447, 'epoch': 3} {'type': 'loss', 'content': 0.041736386716365814, 'timestamp': '2025-09-30 22:43:21.704438', 'step': 19448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:21.792175', 'step': 19448, 'epoch': 3} {'type': 'loss', 'content': 0.10023356229066849, 'timestamp': '2025-09-30 22:43:21.803365', 'step': 19449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:21.910356', 'step': 19449, 'epoch': 3} {'type': 'loss', 'content': 0.09130299091339111, 'timestamp': '2025-09-30 22:43:21.913783', 'step': 19450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:22.008423', 'step': 19450, 'epoch': 3} {'type': 'loss', 'content': 0.08363451063632965, 'timestamp': '2025-09-30 22:43:22.011974', 'step': 19451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:22.087957', 'step': 19451, 'epoch': 3} {'type': 'loss', 'content': 0.0925561934709549, 'timestamp': '2025-09-30 22:43:22.098506', 'step': 19452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:22.173764', 'step': 19452, 'epoch': 3} {'type': 'loss', 'content': 0.07882639020681381, 'timestamp': '2025-09-30 22:43:22.179778', 'step': 19453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:22.247834', 'step': 19453, 'epoch': 3} {'type': 'loss', 'content': 0.1842900812625885, 'timestamp': '2025-09-30 22:43:22.251354', 'step': 19454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:22.338190', 'step': 19454, 'epoch': 3} {'type': 'loss', 'content': 0.07401345670223236, 'timestamp': '2025-09-30 22:43:22.344815', 'step': 19455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:22.428168', 'step': 19455, 'epoch': 3} {'type': 'loss', 'content': 0.1138453334569931, 'timestamp': '2025-09-30 22:43:22.438090', 'step': 19456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:22.514482', 'step': 19456, 'epoch': 3} {'type': 'loss', 'content': 0.0574965700507164, 'timestamp': '2025-09-30 22:43:22.529031', 'step': 19457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:22.600578', 'step': 19457, 'epoch': 3} {'type': 'loss', 'content': 0.16960258781909943, 'timestamp': '2025-09-30 22:43:22.612551', 'step': 19458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:22.691507', 'step': 19458, 'epoch': 3} {'type': 'loss', 'content': 0.037706874310970306, 'timestamp': '2025-09-30 22:43:22.695985', 'step': 19459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:22.767583', 'step': 19459, 'epoch': 3} {'type': 'loss', 'content': 0.08099346607923508, 'timestamp': '2025-09-30 22:43:22.777318', 'step': 19460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:22.845439', 'step': 19460, 'epoch': 3} {'type': 'loss', 'content': 0.10674881935119629, 'timestamp': '2025-09-30 22:43:22.850263', 'step': 19461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:43:22.919766', 'step': 19461, 'epoch': 3} {'type': 'loss', 'content': 0.058911196887493134, 'timestamp': '2025-09-30 22:43:22.935274', 'step': 19462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:23.011528', 'step': 19462, 'epoch': 3} {'type': 'loss', 'content': 0.16071772575378418, 'timestamp': '2025-09-30 22:43:23.023703', 'step': 19463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:23.100038', 'step': 19463, 'epoch': 3} {'type': 'loss', 'content': 0.05725274980068207, 'timestamp': '2025-09-30 22:43:23.119579', 'step': 19464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:23.200784', 'step': 19464, 'epoch': 3} {'type': 'loss', 'content': 0.1536208540201187, 'timestamp': '2025-09-30 22:43:23.204970', 'step': 19465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:23.274121', 'step': 19465, 'epoch': 3} {'type': 'loss', 'content': 0.07045900821685791, 'timestamp': '2025-09-30 22:43:23.278253', 'step': 19466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:23.349907', 'step': 19466, 'epoch': 3} {'type': 'loss', 'content': 0.07482100278139114, 'timestamp': '2025-09-30 22:43:23.362417', 'step': 19467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:23.459974', 'step': 19467, 'epoch': 3} {'type': 'loss', 'content': 0.10687021911144257, 'timestamp': '2025-09-30 22:43:23.468643', 'step': 19468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:23.538670', 'step': 19468, 'epoch': 3} {'type': 'loss', 'content': 0.10161162912845612, 'timestamp': '2025-09-30 22:43:23.541568', 'step': 19469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:23.611097', 'step': 19469, 'epoch': 3} {'type': 'loss', 'content': 0.13431628048419952, 'timestamp': '2025-09-30 22:43:23.623578', 'step': 19470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:23.693729', 'step': 19470, 'epoch': 3} {'type': 'loss', 'content': 0.04069695249199867, 'timestamp': '2025-09-30 22:43:23.697799', 'step': 19471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:23.779746', 'step': 19471, 'epoch': 3} {'type': 'loss', 'content': 0.04830440133810043, 'timestamp': '2025-09-30 22:43:23.798754', 'step': 19472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:23.866114', 'step': 19472, 'epoch': 3} {'type': 'loss', 'content': 0.08679232001304626, 'timestamp': '2025-09-30 22:43:23.870294', 'step': 19473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:23.937348', 'step': 19473, 'epoch': 3} {'type': 'loss', 'content': 0.0948488637804985, 'timestamp': '2025-09-30 22:43:23.940890', 'step': 19474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:24.010159', 'step': 19474, 'epoch': 3} {'type': 'loss', 'content': 0.10205661505460739, 'timestamp': '2025-09-30 22:43:24.014570', 'step': 19475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:24.083687', 'step': 19475, 'epoch': 3} {'type': 'loss', 'content': 0.06891795247793198, 'timestamp': '2025-09-30 22:43:24.094544', 'step': 19476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:24.189743', 'step': 19476, 'epoch': 3} {'type': 'loss', 'content': 0.10251394659280777, 'timestamp': '2025-09-30 22:43:24.192831', 'step': 19477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:24.260726', 'step': 19477, 'epoch': 3} {'type': 'loss', 'content': 0.06668665260076523, 'timestamp': '2025-09-30 22:43:24.265311', 'step': 19478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:24.346697', 'step': 19478, 'epoch': 3} {'type': 'loss', 'content': 0.04634995386004448, 'timestamp': '2025-09-30 22:43:24.350430', 'step': 19479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:24.419549', 'step': 19479, 'epoch': 3} {'type': 'loss', 'content': 0.05225945636630058, 'timestamp': '2025-09-30 22:43:24.428087', 'step': 19480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:24.494444', 'step': 19480, 'epoch': 3} {'type': 'loss', 'content': 0.04896195977926254, 'timestamp': '2025-09-30 22:43:24.498996', 'step': 19481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:24.577304', 'step': 19481, 'epoch': 3} {'type': 'loss', 'content': 0.0726386159658432, 'timestamp': '2025-09-30 22:43:24.586547', 'step': 19482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:24.653289', 'step': 19482, 'epoch': 3} {'type': 'loss', 'content': 0.1083584576845169, 'timestamp': '2025-09-30 22:43:24.663689', 'step': 19483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:24.739378', 'step': 19483, 'epoch': 3} {'type': 'loss', 'content': 0.10128773003816605, 'timestamp': '2025-09-30 22:43:24.748328', 'step': 19484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:24.816517', 'step': 19484, 'epoch': 3} {'type': 'loss', 'content': 0.09508465230464935, 'timestamp': '2025-09-30 22:43:24.820990', 'step': 19485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:24.888271', 'step': 19485, 'epoch': 3} {'type': 'loss', 'content': 0.05691715329885483, 'timestamp': '2025-09-30 22:43:24.894075', 'step': 19486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:24.960134', 'step': 19486, 'epoch': 3} {'type': 'loss', 'content': 0.12229017168283463, 'timestamp': '2025-09-30 22:43:24.964634', 'step': 19487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:25.034745', 'step': 19487, 'epoch': 3} {'type': 'loss', 'content': 0.02284933254122734, 'timestamp': '2025-09-30 22:43:25.044119', 'step': 19488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:25.126579', 'step': 19488, 'epoch': 3} {'type': 'loss', 'content': 0.096613310277462, 'timestamp': '2025-09-30 22:43:25.138067', 'step': 19489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:25.211950', 'step': 19489, 'epoch': 3} {'type': 'loss', 'content': 0.11076455563306808, 'timestamp': '2025-09-30 22:43:25.216325', 'step': 19490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:25.287275', 'step': 19490, 'epoch': 3} {'type': 'loss', 'content': 0.026962928473949432, 'timestamp': '2025-09-30 22:43:25.290563', 'step': 19491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:25.367903', 'step': 19491, 'epoch': 3} {'type': 'loss', 'content': 0.1072981208562851, 'timestamp': '2025-09-30 22:43:25.376237', 'step': 19492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:25.443240', 'step': 19492, 'epoch': 3} {'type': 'loss', 'content': 0.10959865897893906, 'timestamp': '2025-09-30 22:43:25.448083', 'step': 19493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:25.525322', 'step': 19493, 'epoch': 3} {'type': 'loss', 'content': 0.027208378538489342, 'timestamp': '2025-09-30 22:43:25.530073', 'step': 19494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:25.595577', 'step': 19494, 'epoch': 3} {'type': 'loss', 'content': 0.08432915061712265, 'timestamp': '2025-09-30 22:43:25.600215', 'step': 19495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:25.680811', 'step': 19495, 'epoch': 3} {'type': 'loss', 'content': 0.053786084055900574, 'timestamp': '2025-09-30 22:43:25.691636', 'step': 19496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:25.767187', 'step': 19496, 'epoch': 3} {'type': 'loss', 'content': 0.08932678401470184, 'timestamp': '2025-09-30 22:43:25.772793', 'step': 19497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:25.839443', 'step': 19497, 'epoch': 3} {'type': 'loss', 'content': 0.07093577086925507, 'timestamp': '2025-09-30 22:43:25.843882', 'step': 19498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:25.914197', 'step': 19498, 'epoch': 3} {'type': 'loss', 'content': 0.0297837033867836, 'timestamp': '2025-09-30 22:43:25.919953', 'step': 19499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:26.011536', 'step': 19499, 'epoch': 3} {'type': 'loss', 'content': 0.0598529577255249, 'timestamp': '2025-09-30 22:43:26.022340', 'step': 19500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19500', 'timestamp': '2025-09-30 22:43:26.602556', 'step': 19500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:26.669577', 'step': 19500, 'epoch': 3} {'type': 'loss', 'content': 0.047215092927217484, 'timestamp': '2025-09-30 22:43:26.673378', 'step': 19501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:26.768198', 'step': 19501, 'epoch': 3} {'type': 'loss', 'content': 0.10090375691652298, 'timestamp': '2025-09-30 22:43:26.781040', 'step': 19502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:26.847867', 'step': 19502, 'epoch': 3} {'type': 'loss', 'content': 0.1427672952413559, 'timestamp': '2025-09-30 22:43:26.852117', 'step': 19503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:26.918928', 'step': 19503, 'epoch': 3} {'type': 'loss', 'content': 0.030794713646173477, 'timestamp': '2025-09-30 22:43:26.927027', 'step': 19504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:27.006038', 'step': 19504, 'epoch': 3} {'type': 'loss', 'content': 0.09296029806137085, 'timestamp': '2025-09-30 22:43:27.011690', 'step': 19505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:27.080936', 'step': 19505, 'epoch': 3} {'type': 'loss', 'content': 0.062209244817495346, 'timestamp': '2025-09-30 22:43:27.084664', 'step': 19506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:27.162030', 'step': 19506, 'epoch': 3} {'type': 'loss', 'content': 0.062047261744737625, 'timestamp': '2025-09-30 22:43:27.166979', 'step': 19507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:27.235554', 'step': 19507, 'epoch': 3} {'type': 'loss', 'content': 0.12247642874717712, 'timestamp': '2025-09-30 22:43:27.243851', 'step': 19508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:27.325691', 'step': 19508, 'epoch': 3} {'type': 'loss', 'content': 0.08281739801168442, 'timestamp': '2025-09-30 22:43:27.330986', 'step': 19509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:27.398790', 'step': 19509, 'epoch': 3} {'type': 'loss', 'content': 0.0740426629781723, 'timestamp': '2025-09-30 22:43:27.402766', 'step': 19510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:27.476740', 'step': 19510, 'epoch': 3} {'type': 'loss', 'content': 0.04094911739230156, 'timestamp': '2025-09-30 22:43:27.480091', 'step': 19511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:43:27.550603', 'step': 19511, 'epoch': 3} {'type': 'loss', 'content': 0.06221301853656769, 'timestamp': '2025-09-30 22:43:27.559040', 'step': 19512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:27.640869', 'step': 19512, 'epoch': 3} {'type': 'loss', 'content': 0.05930543690919876, 'timestamp': '2025-09-30 22:43:27.645353', 'step': 19513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:27.721528', 'step': 19513, 'epoch': 3} {'type': 'loss', 'content': 0.16316917538642883, 'timestamp': '2025-09-30 22:43:27.737567', 'step': 19514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:27.818761', 'step': 19514, 'epoch': 3} {'type': 'loss', 'content': 0.09129855036735535, 'timestamp': '2025-09-30 22:43:27.823701', 'step': 19515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:27.892404', 'step': 19515, 'epoch': 3} {'type': 'loss', 'content': 0.07946841418743134, 'timestamp': '2025-09-30 22:43:27.903096', 'step': 19516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:27.978292', 'step': 19516, 'epoch': 3} {'type': 'loss', 'content': 0.09288828074932098, 'timestamp': '2025-09-30 22:43:27.982705', 'step': 19517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:28.055515', 'step': 19517, 'epoch': 3} {'type': 'loss', 'content': 0.07949453592300415, 'timestamp': '2025-09-30 22:43:28.061108', 'step': 19518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:28.127086', 'step': 19518, 'epoch': 3} {'type': 'loss', 'content': 0.10314321517944336, 'timestamp': '2025-09-30 22:43:28.133435', 'step': 19519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:28.219688', 'step': 19519, 'epoch': 3} {'type': 'loss', 'content': 0.1006038561463356, 'timestamp': '2025-09-30 22:43:28.228865', 'step': 19520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:28.298172', 'step': 19520, 'epoch': 3} {'type': 'loss', 'content': 0.11910790205001831, 'timestamp': '2025-09-30 22:43:28.301069', 'step': 19521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:28.374438', 'step': 19521, 'epoch': 3} {'type': 'loss', 'content': 0.0485336035490036, 'timestamp': '2025-09-30 22:43:28.377406', 'step': 19522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:28.442642', 'step': 19522, 'epoch': 3} {'type': 'loss', 'content': 0.12959282100200653, 'timestamp': '2025-09-30 22:43:28.451545', 'step': 19523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:28.527330', 'step': 19523, 'epoch': 3} {'type': 'loss', 'content': 0.06778742372989655, 'timestamp': '2025-09-30 22:43:28.537580', 'step': 19524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:28.609224', 'step': 19524, 'epoch': 3} {'type': 'loss', 'content': 0.08891819417476654, 'timestamp': '2025-09-30 22:43:28.614891', 'step': 19525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:28.684954', 'step': 19525, 'epoch': 3} {'type': 'loss', 'content': 0.08092670887708664, 'timestamp': '2025-09-30 22:43:28.688149', 'step': 19526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:28.761042', 'step': 19526, 'epoch': 3} {'type': 'loss', 'content': 0.07136057317256927, 'timestamp': '2025-09-30 22:43:28.766569', 'step': 19527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:28.845675', 'step': 19527, 'epoch': 3} {'type': 'loss', 'content': 0.03827590122818947, 'timestamp': '2025-09-30 22:43:28.853834', 'step': 19528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:28.921663', 'step': 19528, 'epoch': 3} {'type': 'loss', 'content': 0.06634363532066345, 'timestamp': '2025-09-30 22:43:28.928486', 'step': 19529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:28.999018', 'step': 19529, 'epoch': 3} {'type': 'loss', 'content': 0.08778984844684601, 'timestamp': '2025-09-30 22:43:29.001901', 'step': 19530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:29.077743', 'step': 19530, 'epoch': 3} {'type': 'loss', 'content': 0.11363393068313599, 'timestamp': '2025-09-30 22:43:29.081078', 'step': 19531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:29.145905', 'step': 19531, 'epoch': 3} {'type': 'loss', 'content': 0.10856977105140686, 'timestamp': '2025-09-30 22:43:29.153843', 'step': 19532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:29.219696', 'step': 19532, 'epoch': 3} {'type': 'loss', 'content': 0.18818135559558868, 'timestamp': '2025-09-30 22:43:29.222550', 'step': 19533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:29.295483', 'step': 19533, 'epoch': 3} {'type': 'loss', 'content': 0.062485408037900925, 'timestamp': '2025-09-30 22:43:29.304081', 'step': 19534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:29.374234', 'step': 19534, 'epoch': 3} {'type': 'loss', 'content': 0.05890416353940964, 'timestamp': '2025-09-30 22:43:29.378774', 'step': 19535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:29.446442', 'step': 19535, 'epoch': 3} {'type': 'loss', 'content': 0.0548039935529232, 'timestamp': '2025-09-30 22:43:29.458354', 'step': 19536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:29.556576', 'step': 19536, 'epoch': 3} {'type': 'loss', 'content': 0.04371468722820282, 'timestamp': '2025-09-30 22:43:29.561628', 'step': 19537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:29.638126', 'step': 19537, 'epoch': 3} {'type': 'loss', 'content': 0.04329593479633331, 'timestamp': '2025-09-30 22:43:29.642824', 'step': 19538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:29.709678', 'step': 19538, 'epoch': 3} {'type': 'loss', 'content': 0.06830675899982452, 'timestamp': '2025-09-30 22:43:29.712594', 'step': 19539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:29.785147', 'step': 19539, 'epoch': 3} {'type': 'loss', 'content': 0.21357905864715576, 'timestamp': '2025-09-30 22:43:29.793584', 'step': 19540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:29.868441', 'step': 19540, 'epoch': 3} {'type': 'loss', 'content': 0.07275765389204025, 'timestamp': '2025-09-30 22:43:29.871602', 'step': 19541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:29.937855', 'step': 19541, 'epoch': 3} {'type': 'loss', 'content': 0.08954112231731415, 'timestamp': '2025-09-30 22:43:29.941242', 'step': 19542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:30.009355', 'step': 19542, 'epoch': 3} {'type': 'loss', 'content': 0.0378229096531868, 'timestamp': '2025-09-30 22:43:30.013172', 'step': 19543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:30.101320', 'step': 19543, 'epoch': 3} {'type': 'loss', 'content': 0.042231474071741104, 'timestamp': '2025-09-30 22:43:30.110490', 'step': 19544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:30.199431', 'step': 19544, 'epoch': 3} {'type': 'loss', 'content': 0.2366189956665039, 'timestamp': '2025-09-30 22:43:30.202042', 'step': 19545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:30.279133', 'step': 19545, 'epoch': 3} {'type': 'loss', 'content': 0.12397109717130661, 'timestamp': '2025-09-30 22:43:30.288306', 'step': 19546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:30.355318', 'step': 19546, 'epoch': 3} {'type': 'loss', 'content': 0.062040966004133224, 'timestamp': '2025-09-30 22:43:30.360252', 'step': 19547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:30.427442', 'step': 19547, 'epoch': 3} {'type': 'loss', 'content': 0.02443678304553032, 'timestamp': '2025-09-30 22:43:30.445725', 'step': 19548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:30.516886', 'step': 19548, 'epoch': 3} {'type': 'loss', 'content': 0.13243959844112396, 'timestamp': '2025-09-30 22:43:30.526632', 'step': 19549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:30.595960', 'step': 19549, 'epoch': 3} {'type': 'loss', 'content': 0.08326924592256546, 'timestamp': '2025-09-30 22:43:30.606300', 'step': 19550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:30.685432', 'step': 19550, 'epoch': 3} {'type': 'loss', 'content': 0.054676782339811325, 'timestamp': '2025-09-30 22:43:30.688676', 'step': 19551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:30.782392', 'step': 19551, 'epoch': 3} {'type': 'loss', 'content': 0.09942890703678131, 'timestamp': '2025-09-30 22:43:30.790823', 'step': 19552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:30.871724', 'step': 19552, 'epoch': 3} {'type': 'loss', 'content': 0.09356889873743057, 'timestamp': '2025-09-30 22:43:30.874999', 'step': 19553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:30.943242', 'step': 19553, 'epoch': 3} {'type': 'loss', 'content': 0.09071757644414902, 'timestamp': '2025-09-30 22:43:30.947816', 'step': 19554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:31.019047', 'step': 19554, 'epoch': 3} {'type': 'loss', 'content': 0.09565350413322449, 'timestamp': '2025-09-30 22:43:31.025386', 'step': 19555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:31.093074', 'step': 19555, 'epoch': 3} {'type': 'loss', 'content': 0.05202684924006462, 'timestamp': '2025-09-30 22:43:31.113547', 'step': 19556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:31.187637', 'step': 19556, 'epoch': 3} {'type': 'loss', 'content': 0.1419471949338913, 'timestamp': '2025-09-30 22:43:31.194261', 'step': 19557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:31.273430', 'step': 19557, 'epoch': 3} {'type': 'loss', 'content': 0.11725125461816788, 'timestamp': '2025-09-30 22:43:31.277150', 'step': 19558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:31.348609', 'step': 19558, 'epoch': 3} {'type': 'loss', 'content': 0.197920560836792, 'timestamp': '2025-09-30 22:43:31.357278', 'step': 19559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:31.428959', 'step': 19559, 'epoch': 3} {'type': 'loss', 'content': 0.16114512085914612, 'timestamp': '2025-09-30 22:43:31.437803', 'step': 19560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:31.504438', 'step': 19560, 'epoch': 3} {'type': 'loss', 'content': 0.06854604184627533, 'timestamp': '2025-09-30 22:43:31.508347', 'step': 19561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:31.585445', 'step': 19561, 'epoch': 3} {'type': 'loss', 'content': 0.07231024652719498, 'timestamp': '2025-09-30 22:43:31.589291', 'step': 19562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:31.672217', 'step': 19562, 'epoch': 3} {'type': 'loss', 'content': 0.022822216153144836, 'timestamp': '2025-09-30 22:43:31.683308', 'step': 19563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:31.759916', 'step': 19563, 'epoch': 3} {'type': 'loss', 'content': 0.14603537321090698, 'timestamp': '2025-09-30 22:43:31.776303', 'step': 19564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:31.856654', 'step': 19564, 'epoch': 3} {'type': 'loss', 'content': 0.07684742659330368, 'timestamp': '2025-09-30 22:43:31.868141', 'step': 19565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:31.946191', 'step': 19565, 'epoch': 3} {'type': 'loss', 'content': 0.06336019188165665, 'timestamp': '2025-09-30 22:43:31.949205', 'step': 19566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:32.015673', 'step': 19566, 'epoch': 3} {'type': 'loss', 'content': 0.13044458627700806, 'timestamp': '2025-09-30 22:43:32.019289', 'step': 19567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:32.095359', 'step': 19567, 'epoch': 3} {'type': 'loss', 'content': 0.06859228760004044, 'timestamp': '2025-09-30 22:43:32.115671', 'step': 19568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:32.189176', 'step': 19568, 'epoch': 3} {'type': 'loss', 'content': 0.059587158262729645, 'timestamp': '2025-09-30 22:43:32.193643', 'step': 19569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:32.270720', 'step': 19569, 'epoch': 3} {'type': 'loss', 'content': 0.09609636664390564, 'timestamp': '2025-09-30 22:43:32.276960', 'step': 19570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:32.358543', 'step': 19570, 'epoch': 3} {'type': 'loss', 'content': 0.1504010409116745, 'timestamp': '2025-09-30 22:43:32.363088', 'step': 19571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:32.430726', 'step': 19571, 'epoch': 3} {'type': 'loss', 'content': 0.039474133402109146, 'timestamp': '2025-09-30 22:43:32.440205', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:43:50.228183', 'step': 19572, 'epoch': 3} {'type': 'pplx', 'content': 9736.16956519312, 'timestamp': '2025-09-30 22:43:50.230992', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:50.297070', 'step': 19572, 'epoch': 3} {'type': 'loss', 'content': 0.09417658299207687, 'timestamp': '2025-09-30 22:43:50.299293', 'step': 19573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:50.381133', 'step': 19573, 'epoch': 3} {'type': 'loss', 'content': 0.1778106391429901, 'timestamp': '2025-09-30 22:43:50.382961', 'step': 19574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:50.474516', 'step': 19574, 'epoch': 3} {'type': 'loss', 'content': 0.12126670777797699, 'timestamp': '2025-09-30 22:43:50.479723', 'step': 19575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:50.545265', 'step': 19575, 'epoch': 3} {'type': 'loss', 'content': 0.05888501554727554, 'timestamp': '2025-09-30 22:43:50.552366', 'step': 19576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:50.652553', 'step': 19576, 'epoch': 3} {'type': 'loss', 'content': 0.07299569249153137, 'timestamp': '2025-09-30 22:43:50.654514', 'step': 19577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:50.736393', 'step': 19577, 'epoch': 3} {'type': 'loss', 'content': 0.11380966007709503, 'timestamp': '2025-09-30 22:43:50.738398', 'step': 19578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:50.800633', 'step': 19578, 'epoch': 3} {'type': 'loss', 'content': 0.05533929169178009, 'timestamp': '2025-09-30 22:43:50.809424', 'step': 19579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:50.873990', 'step': 19579, 'epoch': 3} {'type': 'loss', 'content': 0.1352389007806778, 'timestamp': '2025-09-30 22:43:50.880947', 'step': 19580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:50.949293', 'step': 19580, 'epoch': 3} {'type': 'loss', 'content': 0.046426739543676376, 'timestamp': '2025-09-30 22:43:50.951485', 'step': 19581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.017721', 'step': 19581, 'epoch': 3} {'type': 'loss', 'content': 0.06815165281295776, 'timestamp': '2025-09-30 22:43:51.020058', 'step': 19582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:51.090776', 'step': 19582, 'epoch': 3} {'type': 'loss', 'content': 0.11911474913358688, 'timestamp': '2025-09-30 22:43:51.092776', 'step': 19583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.157175', 'step': 19583, 'epoch': 3} {'type': 'loss', 'content': 0.08246156573295593, 'timestamp': '2025-09-30 22:43:51.164463', 'step': 19584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.247858', 'step': 19584, 'epoch': 3} {'type': 'loss', 'content': 0.05109471082687378, 'timestamp': '2025-09-30 22:43:51.250784', 'step': 19585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.309623', 'step': 19585, 'epoch': 3} {'type': 'loss', 'content': 0.10969051718711853, 'timestamp': '2025-09-30 22:43:51.312315', 'step': 19586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:51.372527', 'step': 19586, 'epoch': 3} {'type': 'loss', 'content': 0.045078158378601074, 'timestamp': '2025-09-30 22:43:51.375581', 'step': 19587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.446568', 'step': 19587, 'epoch': 3} {'type': 'loss', 'content': 0.15123113989830017, 'timestamp': '2025-09-30 22:43:51.453208', 'step': 19588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:51.511421', 'step': 19588, 'epoch': 3} {'type': 'loss', 'content': 0.07352226972579956, 'timestamp': '2025-09-30 22:43:51.515076', 'step': 19589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:51.572676', 'step': 19589, 'epoch': 3} {'type': 'loss', 'content': 0.13076628744602203, 'timestamp': '2025-09-30 22:43:51.574883', 'step': 19590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.638349', 'step': 19590, 'epoch': 3} {'type': 'loss', 'content': 0.14772556722164154, 'timestamp': '2025-09-30 22:43:51.640632', 'step': 19591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.713958', 'step': 19591, 'epoch': 3} {'type': 'loss', 'content': 0.1653672307729721, 'timestamp': '2025-09-30 22:43:51.720586', 'step': 19592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:51.777741', 'step': 19592, 'epoch': 3} {'type': 'loss', 'content': 0.11366474628448486, 'timestamp': '2025-09-30 22:43:51.780228', 'step': 19593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:51.840188', 'step': 19593, 'epoch': 3} {'type': 'loss', 'content': 0.05643017962574959, 'timestamp': '2025-09-30 22:43:51.844511', 'step': 19594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:51.904808', 'step': 19594, 'epoch': 3} {'type': 'loss', 'content': 0.16135728359222412, 'timestamp': '2025-09-30 22:43:51.907901', 'step': 19595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:51.980943', 'step': 19595, 'epoch': 3} {'type': 'loss', 'content': 0.08074624091386795, 'timestamp': '2025-09-30 22:43:51.987901', 'step': 19596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:52.063479', 'step': 19596, 'epoch': 3} {'type': 'loss', 'content': 0.10788651555776596, 'timestamp': '2025-09-30 22:43:52.067868', 'step': 19597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:52.127157', 'step': 19597, 'epoch': 3} {'type': 'loss', 'content': 0.10472461581230164, 'timestamp': '2025-09-30 22:43:52.130205', 'step': 19598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:52.205941', 'step': 19598, 'epoch': 3} {'type': 'loss', 'content': 0.16391701996326447, 'timestamp': '2025-09-30 22:43:52.208246', 'step': 19599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:52.266272', 'step': 19599, 'epoch': 3} {'type': 'loss', 'content': 0.06874833256006241, 'timestamp': '2025-09-30 22:43:52.274551', 'step': 19600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:52.330942', 'step': 19600, 'epoch': 3} {'type': 'loss', 'content': 0.08240167051553726, 'timestamp': '2025-09-30 22:43:52.334955', 'step': 19601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:52.392688', 'step': 19601, 'epoch': 3} {'type': 'loss', 'content': 0.11908935010433197, 'timestamp': '2025-09-30 22:43:52.395416', 'step': 19602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:52.471762', 'step': 19602, 'epoch': 3} {'type': 'loss', 'content': 0.031783200800418854, 'timestamp': '2025-09-30 22:43:52.476078', 'step': 19603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:52.536999', 'step': 19603, 'epoch': 3} {'type': 'loss', 'content': 0.12713336944580078, 'timestamp': '2025-09-30 22:43:52.545425', 'step': 19604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:52.604749', 'step': 19604, 'epoch': 3} {'type': 'loss', 'content': 0.06737358868122101, 'timestamp': '2025-09-30 22:43:52.607100', 'step': 19605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:52.664125', 'step': 19605, 'epoch': 3} {'type': 'loss', 'content': 0.11140821129083633, 'timestamp': '2025-09-30 22:43:52.667216', 'step': 19606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:52.725862', 'step': 19606, 'epoch': 3} {'type': 'loss', 'content': 0.09929397702217102, 'timestamp': '2025-09-30 22:43:52.729098', 'step': 19607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:52.805381', 'step': 19607, 'epoch': 3} {'type': 'loss', 'content': 0.0791638195514679, 'timestamp': '2025-09-30 22:43:52.813170', 'step': 19608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:52.878625', 'step': 19608, 'epoch': 3} {'type': 'loss', 'content': 0.03856263309717178, 'timestamp': '2025-09-30 22:43:52.881092', 'step': 19609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:52.938199', 'step': 19609, 'epoch': 3} {'type': 'loss', 'content': 0.05492845177650452, 'timestamp': '2025-09-30 22:43:52.940469', 'step': 19610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:52.999401', 'step': 19610, 'epoch': 3} {'type': 'loss', 'content': 0.11049076169729233, 'timestamp': '2025-09-30 22:43:53.001901', 'step': 19611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:53.064550', 'step': 19611, 'epoch': 3} {'type': 'loss', 'content': 0.06728321313858032, 'timestamp': '2025-09-30 22:43:53.071479', 'step': 19612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:53.128548', 'step': 19612, 'epoch': 3} {'type': 'loss', 'content': 0.06608769297599792, 'timestamp': '2025-09-30 22:43:53.130935', 'step': 19613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:53.189436', 'step': 19613, 'epoch': 3} {'type': 'loss', 'content': 0.11751492321491241, 'timestamp': '2025-09-30 22:43:53.191506', 'step': 19614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:53.257928', 'step': 19614, 'epoch': 3} {'type': 'loss', 'content': 0.06622110307216644, 'timestamp': '2025-09-30 22:43:53.260125', 'step': 19615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:53.319099', 'step': 19615, 'epoch': 3} {'type': 'loss', 'content': 0.0906800776720047, 'timestamp': '2025-09-30 22:43:53.325169', 'step': 19616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:53.385854', 'step': 19616, 'epoch': 3} {'type': 'loss', 'content': 0.05918389931321144, 'timestamp': '2025-09-30 22:43:53.387862', 'step': 19617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:53.446661', 'step': 19617, 'epoch': 3} {'type': 'loss', 'content': 0.1654713749885559, 'timestamp': '2025-09-30 22:43:53.449640', 'step': 19618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:53.512615', 'step': 19618, 'epoch': 3} {'type': 'loss', 'content': 0.09044285118579865, 'timestamp': '2025-09-30 22:43:53.514985', 'step': 19619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:53.600146', 'step': 19619, 'epoch': 3} {'type': 'loss', 'content': 0.20198500156402588, 'timestamp': '2025-09-30 22:43:53.607066', 'step': 19620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:53.691007', 'step': 19620, 'epoch': 3} {'type': 'loss', 'content': 0.09865271300077438, 'timestamp': '2025-09-30 22:43:53.695941', 'step': 19621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:53.761762', 'step': 19621, 'epoch': 3} {'type': 'loss', 'content': 0.05393974483013153, 'timestamp': '2025-09-30 22:43:53.764166', 'step': 19622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:53.837805', 'step': 19622, 'epoch': 3} {'type': 'loss', 'content': 0.06537532061338425, 'timestamp': '2025-09-30 22:43:53.840599', 'step': 19623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:53.900305', 'step': 19623, 'epoch': 3} {'type': 'loss', 'content': 0.12725088000297546, 'timestamp': '2025-09-30 22:43:53.906355', 'step': 19624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:53.982786', 'step': 19624, 'epoch': 3} {'type': 'loss', 'content': 0.08113911747932434, 'timestamp': '2025-09-30 22:43:53.985084', 'step': 19625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:54.062847', 'step': 19625, 'epoch': 3} {'type': 'loss', 'content': 0.07356328517198563, 'timestamp': '2025-09-30 22:43:54.065237', 'step': 19626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:54.149898', 'step': 19626, 'epoch': 3} {'type': 'loss', 'content': 0.03873545303940773, 'timestamp': '2025-09-30 22:43:54.152135', 'step': 19627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:54.274648', 'step': 19627, 'epoch': 3} {'type': 'loss', 'content': 0.07577034085988998, 'timestamp': '2025-09-30 22:43:54.281846', 'step': 19628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:43:54.356303', 'step': 19628, 'epoch': 3} {'type': 'loss', 'content': 0.03598861023783684, 'timestamp': '2025-09-30 22:43:54.360075', 'step': 19629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:54.453433', 'step': 19629, 'epoch': 3} {'type': 'loss', 'content': 0.06940563768148422, 'timestamp': '2025-09-30 22:43:54.455709', 'step': 19630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:54.556676', 'step': 19630, 'epoch': 3} {'type': 'loss', 'content': 0.11125241965055466, 'timestamp': '2025-09-30 22:43:54.559068', 'step': 19631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:54.662805', 'step': 19631, 'epoch': 3} {'type': 'loss', 'content': 0.10178142040967941, 'timestamp': '2025-09-30 22:43:54.669164', 'step': 19632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:54.761560', 'step': 19632, 'epoch': 3} {'type': 'loss', 'content': 0.1453048586845398, 'timestamp': '2025-09-30 22:43:54.763873', 'step': 19633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:54.830139', 'step': 19633, 'epoch': 3} {'type': 'loss', 'content': 0.27345526218414307, 'timestamp': '2025-09-30 22:43:54.833933', 'step': 19634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:54.896238', 'step': 19634, 'epoch': 3} {'type': 'loss', 'content': 0.14960268139839172, 'timestamp': '2025-09-30 22:43:54.899203', 'step': 19635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:54.968470', 'step': 19635, 'epoch': 3} {'type': 'loss', 'content': 0.1550963670015335, 'timestamp': '2025-09-30 22:43:54.978510', 'step': 19636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:55.056567', 'step': 19636, 'epoch': 3} {'type': 'loss', 'content': 0.11931634694337845, 'timestamp': '2025-09-30 22:43:55.059027', 'step': 19637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:55.133572', 'step': 19637, 'epoch': 3} {'type': 'loss', 'content': 0.05645795539021492, 'timestamp': '2025-09-30 22:43:55.136321', 'step': 19638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:55.194466', 'step': 19638, 'epoch': 3} {'type': 'loss', 'content': 0.09822133183479309, 'timestamp': '2025-09-30 22:43:55.196633', 'step': 19639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:55.254191', 'step': 19639, 'epoch': 3} {'type': 'loss', 'content': 0.0367857851088047, 'timestamp': '2025-09-30 22:43:55.260398', 'step': 19640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:55.317403', 'step': 19640, 'epoch': 3} {'type': 'loss', 'content': 0.16254295408725739, 'timestamp': '2025-09-30 22:43:55.320000', 'step': 19641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:55.377800', 'step': 19641, 'epoch': 3} {'type': 'loss', 'content': 0.08419929444789886, 'timestamp': '2025-09-30 22:43:55.380196', 'step': 19642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:55.441331', 'step': 19642, 'epoch': 3} {'type': 'loss', 'content': 0.03403563052415848, 'timestamp': '2025-09-30 22:43:55.443967', 'step': 19643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:55.500379', 'step': 19643, 'epoch': 3} {'type': 'loss', 'content': 0.03122418373823166, 'timestamp': '2025-09-30 22:43:55.506464', 'step': 19644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:55.563953', 'step': 19644, 'epoch': 3} {'type': 'loss', 'content': 0.062183499336242676, 'timestamp': '2025-09-30 22:43:55.566621', 'step': 19645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:55.646032', 'step': 19645, 'epoch': 3} {'type': 'loss', 'content': 0.24092404544353485, 'timestamp': '2025-09-30 22:43:55.648697', 'step': 19646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:55.712622', 'step': 19646, 'epoch': 3} {'type': 'loss', 'content': 0.1462680697441101, 'timestamp': '2025-09-30 22:43:55.715364', 'step': 19647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:55.784830', 'step': 19647, 'epoch': 3} {'type': 'loss', 'content': 0.17077258229255676, 'timestamp': '2025-09-30 22:43:55.792535', 'step': 19648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:55.850608', 'step': 19648, 'epoch': 3} {'type': 'loss', 'content': 0.0477396659553051, 'timestamp': '2025-09-30 22:43:55.852942', 'step': 19649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:55.912121', 'step': 19649, 'epoch': 3} {'type': 'loss', 'content': 0.03662000969052315, 'timestamp': '2025-09-30 22:43:55.916623', 'step': 19650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:55.973879', 'step': 19650, 'epoch': 3} {'type': 'loss', 'content': 0.1162535697221756, 'timestamp': '2025-09-30 22:43:55.977733', 'step': 19651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:56.052506', 'step': 19651, 'epoch': 3} {'type': 'loss', 'content': 0.05302426964044571, 'timestamp': '2025-09-30 22:43:56.059249', 'step': 19652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:56.115151', 'step': 19652, 'epoch': 3} {'type': 'loss', 'content': 0.12741558253765106, 'timestamp': '2025-09-30 22:43:56.117557', 'step': 19653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:56.175021', 'step': 19653, 'epoch': 3} {'type': 'loss', 'content': 0.07939058542251587, 'timestamp': '2025-09-30 22:43:56.177430', 'step': 19654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:56.237341', 'step': 19654, 'epoch': 3} {'type': 'loss', 'content': 0.18191251158714294, 'timestamp': '2025-09-30 22:43:56.239557', 'step': 19655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:56.309411', 'step': 19655, 'epoch': 3} {'type': 'loss', 'content': 0.09735814481973648, 'timestamp': '2025-09-30 22:43:56.316223', 'step': 19656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:56.382508', 'step': 19656, 'epoch': 3} {'type': 'loss', 'content': 0.044860225170850754, 'timestamp': '2025-09-30 22:43:56.384244', 'step': 19657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:56.461552', 'step': 19657, 'epoch': 3} {'type': 'loss', 'content': 0.09751775115728378, 'timestamp': '2025-09-30 22:43:56.465183', 'step': 19658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:56.534574', 'step': 19658, 'epoch': 3} {'type': 'loss', 'content': 0.056492168456315994, 'timestamp': '2025-09-30 22:43:56.537958', 'step': 19659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:56.599298', 'step': 19659, 'epoch': 3} {'type': 'loss', 'content': 0.09185358881950378, 'timestamp': '2025-09-30 22:43:56.605252', 'step': 19660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:56.674498', 'step': 19660, 'epoch': 3} {'type': 'loss', 'content': 0.15325550734996796, 'timestamp': '2025-09-30 22:43:56.679723', 'step': 19661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:56.736758', 'step': 19661, 'epoch': 3} {'type': 'loss', 'content': 0.04793432727456093, 'timestamp': '2025-09-30 22:43:56.740836', 'step': 19662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:56.808269', 'step': 19662, 'epoch': 3} {'type': 'loss', 'content': 0.07080008834600449, 'timestamp': '2025-09-30 22:43:56.810969', 'step': 19663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:56.870477', 'step': 19663, 'epoch': 3} {'type': 'loss', 'content': 0.01672479882836342, 'timestamp': '2025-09-30 22:43:56.876526', 'step': 19664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:56.933252', 'step': 19664, 'epoch': 3} {'type': 'loss', 'content': 0.05233015492558479, 'timestamp': '2025-09-30 22:43:56.935875', 'step': 19665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:56.996067', 'step': 19665, 'epoch': 3} {'type': 'loss', 'content': 0.15045908093452454, 'timestamp': '2025-09-30 22:43:56.998607', 'step': 19666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:57.056620', 'step': 19666, 'epoch': 3} {'type': 'loss', 'content': 0.05677605792880058, 'timestamp': '2025-09-30 22:43:57.059547', 'step': 19667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:57.118673', 'step': 19667, 'epoch': 3} {'type': 'loss', 'content': 0.058470405638217926, 'timestamp': '2025-09-30 22:43:57.124936', 'step': 19668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:57.195689', 'step': 19668, 'epoch': 3} {'type': 'loss', 'content': 0.11616528034210205, 'timestamp': '2025-09-30 22:43:57.198321', 'step': 19669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:57.277553', 'step': 19669, 'epoch': 3} {'type': 'loss', 'content': 0.054483331739902496, 'timestamp': '2025-09-30 22:43:57.281633', 'step': 19670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:57.339128', 'step': 19670, 'epoch': 3} {'type': 'loss', 'content': 0.14964251220226288, 'timestamp': '2025-09-30 22:43:57.348129', 'step': 19671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:57.409527', 'step': 19671, 'epoch': 3} {'type': 'loss', 'content': 0.11210275441408157, 'timestamp': '2025-09-30 22:43:57.415708', 'step': 19672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:57.477875', 'step': 19672, 'epoch': 3} {'type': 'loss', 'content': 0.16018158197402954, 'timestamp': '2025-09-30 22:43:57.480250', 'step': 19673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:57.537185', 'step': 19673, 'epoch': 3} {'type': 'loss', 'content': 0.14280225336551666, 'timestamp': '2025-09-30 22:43:57.539548', 'step': 19674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:57.605977', 'step': 19674, 'epoch': 3} {'type': 'loss', 'content': 0.15254399180412292, 'timestamp': '2025-09-30 22:43:57.608634', 'step': 19675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:57.680574', 'step': 19675, 'epoch': 3} {'type': 'loss', 'content': 0.13128793239593506, 'timestamp': '2025-09-30 22:43:57.690804', 'step': 19676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:57.749524', 'step': 19676, 'epoch': 3} {'type': 'loss', 'content': 0.10291833430528641, 'timestamp': '2025-09-30 22:43:57.751865', 'step': 19677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:57.810137', 'step': 19677, 'epoch': 3} {'type': 'loss', 'content': 0.07863886654376984, 'timestamp': '2025-09-30 22:43:57.812753', 'step': 19678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:57.871640', 'step': 19678, 'epoch': 3} {'type': 'loss', 'content': 0.09561081230640411, 'timestamp': '2025-09-30 22:43:57.874257', 'step': 19679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:57.933724', 'step': 19679, 'epoch': 3} {'type': 'loss', 'content': 0.03668558597564697, 'timestamp': '2025-09-30 22:43:57.940382', 'step': 19680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:57.997157', 'step': 19680, 'epoch': 3} {'type': 'loss', 'content': 0.12815825641155243, 'timestamp': '2025-09-30 22:43:57.999541', 'step': 19681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.066985', 'step': 19681, 'epoch': 3} {'type': 'loss', 'content': 0.02045387215912342, 'timestamp': '2025-09-30 22:43:58.069190', 'step': 19682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.127857', 'step': 19682, 'epoch': 3} {'type': 'loss', 'content': 0.027304690331220627, 'timestamp': '2025-09-30 22:43:58.129847', 'step': 19683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:58.202087', 'step': 19683, 'epoch': 3} {'type': 'loss', 'content': 0.07365117967128754, 'timestamp': '2025-09-30 22:43:58.207965', 'step': 19684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.263943', 'step': 19684, 'epoch': 3} {'type': 'loss', 'content': 0.07627130299806595, 'timestamp': '2025-09-30 22:43:58.266405', 'step': 19685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:58.322705', 'step': 19685, 'epoch': 3} {'type': 'loss', 'content': 0.16508136689662933, 'timestamp': '2025-09-30 22:43:58.325126', 'step': 19686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.382080', 'step': 19686, 'epoch': 3} {'type': 'loss', 'content': 0.052577611058950424, 'timestamp': '2025-09-30 22:43:58.385556', 'step': 19687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:58.455082', 'step': 19687, 'epoch': 3} {'type': 'loss', 'content': 0.11489887535572052, 'timestamp': '2025-09-30 22:43:58.463401', 'step': 19688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:58.521309', 'step': 19688, 'epoch': 3} {'type': 'loss', 'content': 0.07243222743272781, 'timestamp': '2025-09-30 22:43:58.525051', 'step': 19689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:58.581958', 'step': 19689, 'epoch': 3} {'type': 'loss', 'content': 0.12322337925434113, 'timestamp': '2025-09-30 22:43:58.584278', 'step': 19690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.643899', 'step': 19690, 'epoch': 3} {'type': 'loss', 'content': 0.04038462042808533, 'timestamp': '2025-09-30 22:43:58.646359', 'step': 19691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.706192', 'step': 19691, 'epoch': 3} {'type': 'loss', 'content': 0.04200126603245735, 'timestamp': '2025-09-30 22:43:58.712169', 'step': 19692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.773296', 'step': 19692, 'epoch': 3} {'type': 'loss', 'content': 0.1395256370306015, 'timestamp': '2025-09-30 22:43:58.775965', 'step': 19693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:58.833992', 'step': 19693, 'epoch': 3} {'type': 'loss', 'content': 0.10103163868188858, 'timestamp': '2025-09-30 22:43:58.836357', 'step': 19694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:58.893061', 'step': 19694, 'epoch': 3} {'type': 'loss', 'content': 0.13825306296348572, 'timestamp': '2025-09-30 22:43:58.895643', 'step': 19695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:58.954853', 'step': 19695, 'epoch': 3} {'type': 'loss', 'content': 0.050968822091817856, 'timestamp': '2025-09-30 22:43:58.960809', 'step': 19696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:59.022549', 'step': 19696, 'epoch': 3} {'type': 'loss', 'content': 0.06733620166778564, 'timestamp': '2025-09-30 22:43:59.024881', 'step': 19697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:59.082959', 'step': 19697, 'epoch': 3} {'type': 'loss', 'content': 0.07008688151836395, 'timestamp': '2025-09-30 22:43:59.085208', 'step': 19698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:59.143639', 'step': 19698, 'epoch': 3} {'type': 'loss', 'content': 0.08186334371566772, 'timestamp': '2025-09-30 22:43:59.145974', 'step': 19699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:59.206232', 'step': 19699, 'epoch': 3} {'type': 'loss', 'content': 0.055432602763175964, 'timestamp': '2025-09-30 22:43:59.212290', 'step': 19700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:43:59.280807', 'step': 19700, 'epoch': 3} {'type': 'loss', 'content': 0.13987845182418823, 'timestamp': '2025-09-30 22:43:59.284413', 'step': 19701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:59.341006', 'step': 19701, 'epoch': 3} {'type': 'loss', 'content': 0.16914698481559753, 'timestamp': '2025-09-30 22:43:59.343536', 'step': 19702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:59.404211', 'step': 19702, 'epoch': 3} {'type': 'loss', 'content': 0.06128271296620369, 'timestamp': '2025-09-30 22:43:59.406497', 'step': 19703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:59.463255', 'step': 19703, 'epoch': 3} {'type': 'loss', 'content': 0.19131824374198914, 'timestamp': '2025-09-30 22:43:59.469652', 'step': 19704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:43:59.528374', 'step': 19704, 'epoch': 3} {'type': 'loss', 'content': 0.04993048682808876, 'timestamp': '2025-09-30 22:43:59.530750', 'step': 19705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:59.589825', 'step': 19705, 'epoch': 3} {'type': 'loss', 'content': 0.056003253906965256, 'timestamp': '2025-09-30 22:43:59.592847', 'step': 19706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:59.657290', 'step': 19706, 'epoch': 3} {'type': 'loss', 'content': 0.027087152004241943, 'timestamp': '2025-09-30 22:43:59.659832', 'step': 19707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:59.718581', 'step': 19707, 'epoch': 3} {'type': 'loss', 'content': 0.032197244465351105, 'timestamp': '2025-09-30 22:43:59.725538', 'step': 19708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:43:59.783217', 'step': 19708, 'epoch': 3} {'type': 'loss', 'content': 0.07772716879844666, 'timestamp': '2025-09-30 22:43:59.785812', 'step': 19709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:59.845079', 'step': 19709, 'epoch': 3} {'type': 'loss', 'content': 0.09443821012973785, 'timestamp': '2025-09-30 22:43:59.847506', 'step': 19710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:43:59.905705', 'step': 19710, 'epoch': 3} {'type': 'loss', 'content': 0.05238291621208191, 'timestamp': '2025-09-30 22:43:59.908199', 'step': 19711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:43:59.964204', 'step': 19711, 'epoch': 3} {'type': 'loss', 'content': 0.1617303043603897, 'timestamp': '2025-09-30 22:43:59.970441', 'step': 19712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:00.036358', 'step': 19712, 'epoch': 3} {'type': 'loss', 'content': 0.18013301491737366, 'timestamp': '2025-09-30 22:44:00.038915', 'step': 19713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:00.109744', 'step': 19713, 'epoch': 3} {'type': 'loss', 'content': 0.0845901146531105, 'timestamp': '2025-09-30 22:44:00.111868', 'step': 19714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:00.180459', 'step': 19714, 'epoch': 3} {'type': 'loss', 'content': 0.07930770516395569, 'timestamp': '2025-09-30 22:44:00.182619', 'step': 19715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:00.240720', 'step': 19715, 'epoch': 3} {'type': 'loss', 'content': 0.06389615684747696, 'timestamp': '2025-09-30 22:44:00.246828', 'step': 19716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:00.310441', 'step': 19716, 'epoch': 3} {'type': 'loss', 'content': 0.11621981859207153, 'timestamp': '2025-09-30 22:44:00.313037', 'step': 19717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:00.379770', 'step': 19717, 'epoch': 3} {'type': 'loss', 'content': 0.10196910798549652, 'timestamp': '2025-09-30 22:44:00.383123', 'step': 19718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:00.441672', 'step': 19718, 'epoch': 3} {'type': 'loss', 'content': 0.06487315893173218, 'timestamp': '2025-09-30 22:44:00.444638', 'step': 19719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:00.502629', 'step': 19719, 'epoch': 3} {'type': 'loss', 'content': 0.05915650725364685, 'timestamp': '2025-09-30 22:44:00.508522', 'step': 19720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:00.577563', 'step': 19720, 'epoch': 3} {'type': 'loss', 'content': 0.03446308150887489, 'timestamp': '2025-09-30 22:44:00.579651', 'step': 19721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:00.638657', 'step': 19721, 'epoch': 3} {'type': 'loss', 'content': 0.10307422280311584, 'timestamp': '2025-09-30 22:44:00.641016', 'step': 19722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:00.699889', 'step': 19722, 'epoch': 3} {'type': 'loss', 'content': 0.12258266657590866, 'timestamp': '2025-09-30 22:44:00.702306', 'step': 19723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:00.760240', 'step': 19723, 'epoch': 3} {'type': 'loss', 'content': 0.13386039435863495, 'timestamp': '2025-09-30 22:44:00.767557', 'step': 19724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:00.825955', 'step': 19724, 'epoch': 3} {'type': 'loss', 'content': 0.19607029855251312, 'timestamp': '2025-09-30 22:44:00.829958', 'step': 19725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:00.892163', 'step': 19725, 'epoch': 3} {'type': 'loss', 'content': 0.10524329543113708, 'timestamp': '2025-09-30 22:44:00.894588', 'step': 19726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:00.953979', 'step': 19726, 'epoch': 3} {'type': 'loss', 'content': 0.043480921536684036, 'timestamp': '2025-09-30 22:44:00.959910', 'step': 19727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:01.017899', 'step': 19727, 'epoch': 3} {'type': 'loss', 'content': 0.08477984368801117, 'timestamp': '2025-09-30 22:44:01.024493', 'step': 19728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:01.089050', 'step': 19728, 'epoch': 3} {'type': 'loss', 'content': 0.06363649666309357, 'timestamp': '2025-09-30 22:44:01.091278', 'step': 19729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:01.150007', 'step': 19729, 'epoch': 3} {'type': 'loss', 'content': 0.11554902791976929, 'timestamp': '2025-09-30 22:44:01.152262', 'step': 19730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:01.220062', 'step': 19730, 'epoch': 3} {'type': 'loss', 'content': 0.06188930198550224, 'timestamp': '2025-09-30 22:44:01.231505', 'step': 19731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:01.303222', 'step': 19731, 'epoch': 3} {'type': 'loss', 'content': 0.11085178703069687, 'timestamp': '2025-09-30 22:44:01.308940', 'step': 19732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:01.366184', 'step': 19732, 'epoch': 3} {'type': 'loss', 'content': 0.1372198462486267, 'timestamp': '2025-09-30 22:44:01.369079', 'step': 19733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:01.438788', 'step': 19733, 'epoch': 3} {'type': 'loss', 'content': 0.08907867968082428, 'timestamp': '2025-09-30 22:44:01.442326', 'step': 19734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:01.501159', 'step': 19734, 'epoch': 3} {'type': 'loss', 'content': 0.0619630292057991, 'timestamp': '2025-09-30 22:44:01.507016', 'step': 19735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:01.572734', 'step': 19735, 'epoch': 3} {'type': 'loss', 'content': 0.07562649250030518, 'timestamp': '2025-09-30 22:44:01.579164', 'step': 19736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:01.637379', 'step': 19736, 'epoch': 3} {'type': 'loss', 'content': 0.04859752580523491, 'timestamp': '2025-09-30 22:44:01.639640', 'step': 19737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:01.696810', 'step': 19737, 'epoch': 3} {'type': 'loss', 'content': 0.07157289981842041, 'timestamp': '2025-09-30 22:44:01.701420', 'step': 19738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:01.761607', 'step': 19738, 'epoch': 3} {'type': 'loss', 'content': 0.03990424796938896, 'timestamp': '2025-09-30 22:44:01.766995', 'step': 19739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:01.826499', 'step': 19739, 'epoch': 3} {'type': 'loss', 'content': 0.059926629066467285, 'timestamp': '2025-09-30 22:44:01.833249', 'step': 19740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:01.894440', 'step': 19740, 'epoch': 3} {'type': 'loss', 'content': 0.024530785158276558, 'timestamp': '2025-09-30 22:44:01.896781', 'step': 19741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:01.956672', 'step': 19741, 'epoch': 3} {'type': 'loss', 'content': 0.13222746551036835, 'timestamp': '2025-09-30 22:44:01.959222', 'step': 19742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:02.023365', 'step': 19742, 'epoch': 3} {'type': 'loss', 'content': 0.0032208303455263376, 'timestamp': '2025-09-30 22:44:02.026756', 'step': 19743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:02.093138', 'step': 19743, 'epoch': 3} {'type': 'loss', 'content': 0.04078640416264534, 'timestamp': '2025-09-30 22:44:02.099124', 'step': 19744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:02.159808', 'step': 19744, 'epoch': 3} {'type': 'loss', 'content': 0.04702415317296982, 'timestamp': '2025-09-30 22:44:02.163254', 'step': 19745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:02.238955', 'step': 19745, 'epoch': 3} {'type': 'loss', 'content': 0.08849745988845825, 'timestamp': '2025-09-30 22:44:02.241587', 'step': 19746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:02.301903', 'step': 19746, 'epoch': 3} {'type': 'loss', 'content': 0.09458629041910172, 'timestamp': '2025-09-30 22:44:02.308275', 'step': 19747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:02.367283', 'step': 19747, 'epoch': 3} {'type': 'loss', 'content': 0.01561368815600872, 'timestamp': '2025-09-30 22:44:02.378109', 'step': 19748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:02.448808', 'step': 19748, 'epoch': 3} {'type': 'loss', 'content': 0.04587879776954651, 'timestamp': '2025-09-30 22:44:02.453926', 'step': 19749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:02.524473', 'step': 19749, 'epoch': 3} {'type': 'loss', 'content': 0.0924539640545845, 'timestamp': '2025-09-30 22:44:02.526633', 'step': 19750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:02.596433', 'step': 19750, 'epoch': 3} {'type': 'loss', 'content': 0.08546476811170578, 'timestamp': '2025-09-30 22:44:02.598592', 'step': 19751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:02.660451', 'step': 19751, 'epoch': 3} {'type': 'loss', 'content': 0.0829431489109993, 'timestamp': '2025-09-30 22:44:02.666496', 'step': 19752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:02.723898', 'step': 19752, 'epoch': 3} {'type': 'loss', 'content': 0.023352228105068207, 'timestamp': '2025-09-30 22:44:02.726161', 'step': 19753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:02.782867', 'step': 19753, 'epoch': 3} {'type': 'loss', 'content': 0.08677460998296738, 'timestamp': '2025-09-30 22:44:02.785111', 'step': 19754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:02.841965', 'step': 19754, 'epoch': 3} {'type': 'loss', 'content': 0.02655380219221115, 'timestamp': '2025-09-30 22:44:02.844233', 'step': 19755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:02.901748', 'step': 19755, 'epoch': 3} {'type': 'loss', 'content': 0.11372705549001694, 'timestamp': '2025-09-30 22:44:02.910203', 'step': 19756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:02.969281', 'step': 19756, 'epoch': 3} {'type': 'loss', 'content': 0.01903020776808262, 'timestamp': '2025-09-30 22:44:02.975321', 'step': 19757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:03.032082', 'step': 19757, 'epoch': 3} {'type': 'loss', 'content': 0.06371214985847473, 'timestamp': '2025-09-30 22:44:03.035372', 'step': 19758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:03.092607', 'step': 19758, 'epoch': 3} {'type': 'loss', 'content': 0.1912257820367813, 'timestamp': '2025-09-30 22:44:03.094882', 'step': 19759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:03.163072', 'step': 19759, 'epoch': 3} {'type': 'loss', 'content': 0.04787641763687134, 'timestamp': '2025-09-30 22:44:03.169104', 'step': 19760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:03.225384', 'step': 19760, 'epoch': 3} {'type': 'loss', 'content': 0.10372982174158096, 'timestamp': '2025-09-30 22:44:03.227637', 'step': 19761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:03.301305', 'step': 19761, 'epoch': 3} {'type': 'loss', 'content': 0.06311934441328049, 'timestamp': '2025-09-30 22:44:03.303914', 'step': 19762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:03.361751', 'step': 19762, 'epoch': 3} {'type': 'loss', 'content': 0.1216520145535469, 'timestamp': '2025-09-30 22:44:03.367638', 'step': 19763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:03.425621', 'step': 19763, 'epoch': 3} {'type': 'loss', 'content': 0.05988646671175957, 'timestamp': '2025-09-30 22:44:03.431691', 'step': 19764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:03.500171', 'step': 19764, 'epoch': 3} {'type': 'loss', 'content': 0.08764403313398361, 'timestamp': '2025-09-30 22:44:03.502942', 'step': 19765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:03.559968', 'step': 19765, 'epoch': 3} {'type': 'loss', 'content': 0.04389079660177231, 'timestamp': '2025-09-30 22:44:03.562443', 'step': 19766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:03.621410', 'step': 19766, 'epoch': 3} {'type': 'loss', 'content': 0.12826433777809143, 'timestamp': '2025-09-30 22:44:03.624052', 'step': 19767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:03.690465', 'step': 19767, 'epoch': 3} {'type': 'loss', 'content': 0.10533038526773453, 'timestamp': '2025-09-30 22:44:03.696646', 'step': 19768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:03.753353', 'step': 19768, 'epoch': 3} {'type': 'loss', 'content': 0.10396457463502884, 'timestamp': '2025-09-30 22:44:03.756531', 'step': 19769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:03.814391', 'step': 19769, 'epoch': 3} {'type': 'loss', 'content': 0.10378243774175644, 'timestamp': '2025-09-30 22:44:03.820932', 'step': 19770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:03.885469', 'step': 19770, 'epoch': 3} {'type': 'loss', 'content': 0.10932177305221558, 'timestamp': '2025-09-30 22:44:03.888188', 'step': 19771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:03.948011', 'step': 19771, 'epoch': 3} {'type': 'loss', 'content': 0.08672484755516052, 'timestamp': '2025-09-30 22:44:03.953889', 'step': 19772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:04.020670', 'step': 19772, 'epoch': 3} {'type': 'loss', 'content': 0.0332503616809845, 'timestamp': '2025-09-30 22:44:04.022948', 'step': 19773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:04.093595', 'step': 19773, 'epoch': 3} {'type': 'loss', 'content': 0.06995880603790283, 'timestamp': '2025-09-30 22:44:04.096997', 'step': 19774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:04.154793', 'step': 19774, 'epoch': 3} {'type': 'loss', 'content': 0.06364498287439346, 'timestamp': '2025-09-30 22:44:04.157031', 'step': 19775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:04.213891', 'step': 19775, 'epoch': 3} {'type': 'loss', 'content': 0.11119689047336578, 'timestamp': '2025-09-30 22:44:04.219896', 'step': 19776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:04.276398', 'step': 19776, 'epoch': 3} {'type': 'loss', 'content': 0.01903095655143261, 'timestamp': '2025-09-30 22:44:04.279259', 'step': 19777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:04.345753', 'step': 19777, 'epoch': 3} {'type': 'loss', 'content': 0.14407087862491608, 'timestamp': '2025-09-30 22:44:04.348183', 'step': 19778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:04.405842', 'step': 19778, 'epoch': 3} {'type': 'loss', 'content': 0.09964260458946228, 'timestamp': '2025-09-30 22:44:04.409229', 'step': 19779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:04.479855', 'step': 19779, 'epoch': 3} {'type': 'loss', 'content': 0.05369683355093002, 'timestamp': '2025-09-30 22:44:04.486828', 'step': 19780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:04.544275', 'step': 19780, 'epoch': 3} {'type': 'loss', 'content': 0.10439231991767883, 'timestamp': '2025-09-30 22:44:04.546663', 'step': 19781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:04.605198', 'step': 19781, 'epoch': 3} {'type': 'loss', 'content': 0.10010527074337006, 'timestamp': '2025-09-30 22:44:04.607840', 'step': 19782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:04.666113', 'step': 19782, 'epoch': 3} {'type': 'loss', 'content': 0.1118948683142662, 'timestamp': '2025-09-30 22:44:04.668469', 'step': 19783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:04.748586', 'step': 19783, 'epoch': 3} {'type': 'loss', 'content': 0.041065335273742676, 'timestamp': '2025-09-30 22:44:04.758016', 'step': 19784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:04.817576', 'step': 19784, 'epoch': 3} {'type': 'loss', 'content': 0.04268241301178932, 'timestamp': '2025-09-30 22:44:04.820663', 'step': 19785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:44:04.893774', 'step': 19785, 'epoch': 3} {'type': 'loss', 'content': 0.12241019308567047, 'timestamp': '2025-09-30 22:44:04.896531', 'step': 19786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:04.956908', 'step': 19786, 'epoch': 3} {'type': 'loss', 'content': 0.08346395939588547, 'timestamp': '2025-09-30 22:44:04.963099', 'step': 19787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:05.020294', 'step': 19787, 'epoch': 3} {'type': 'loss', 'content': 0.11357057094573975, 'timestamp': '2025-09-30 22:44:05.026289', 'step': 19788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:05.104506', 'step': 19788, 'epoch': 3} {'type': 'loss', 'content': 0.16385257244110107, 'timestamp': '2025-09-30 22:44:05.109925', 'step': 19789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:05.167974', 'step': 19789, 'epoch': 3} {'type': 'loss', 'content': 0.0955347791314125, 'timestamp': '2025-09-30 22:44:05.170287', 'step': 19790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:05.232318', 'step': 19790, 'epoch': 3} {'type': 'loss', 'content': 0.05918162688612938, 'timestamp': '2025-09-30 22:44:05.237022', 'step': 19791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.296755', 'step': 19791, 'epoch': 3} {'type': 'loss', 'content': 0.055265504866838455, 'timestamp': '2025-09-30 22:44:05.302880', 'step': 19792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.361055', 'step': 19792, 'epoch': 3} {'type': 'loss', 'content': 0.05070104822516441, 'timestamp': '2025-09-30 22:44:05.364238', 'step': 19793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.422444', 'step': 19793, 'epoch': 3} {'type': 'loss', 'content': 0.06370918452739716, 'timestamp': '2025-09-30 22:44:05.425167', 'step': 19794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.483838', 'step': 19794, 'epoch': 3} {'type': 'loss', 'content': 0.06396674364805222, 'timestamp': '2025-09-30 22:44:05.486477', 'step': 19795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:05.545096', 'step': 19795, 'epoch': 3} {'type': 'loss', 'content': 0.06477859616279602, 'timestamp': '2025-09-30 22:44:05.551690', 'step': 19796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:05.614522', 'step': 19796, 'epoch': 3} {'type': 'loss', 'content': 0.10088783502578735, 'timestamp': '2025-09-30 22:44:05.617450', 'step': 19797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:05.685269', 'step': 19797, 'epoch': 3} {'type': 'loss', 'content': 0.05724361911416054, 'timestamp': '2025-09-30 22:44:05.688359', 'step': 19798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.745590', 'step': 19798, 'epoch': 3} {'type': 'loss', 'content': 0.14441940188407898, 'timestamp': '2025-09-30 22:44:05.750075', 'step': 19799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.808790', 'step': 19799, 'epoch': 3} {'type': 'loss', 'content': 0.09127213805913925, 'timestamp': '2025-09-30 22:44:05.817993', 'step': 19800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.888711', 'step': 19800, 'epoch': 3} {'type': 'loss', 'content': 0.12161511927843094, 'timestamp': '2025-09-30 22:44:05.891794', 'step': 19801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:05.954499', 'step': 19801, 'epoch': 3} {'type': 'loss', 'content': 0.07569373399019241, 'timestamp': '2025-09-30 22:44:05.960372', 'step': 19802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:06.020525', 'step': 19802, 'epoch': 3} {'type': 'loss', 'content': 0.05826340243220329, 'timestamp': '2025-09-30 22:44:06.025496', 'step': 19803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:06.086522', 'step': 19803, 'epoch': 3} {'type': 'loss', 'content': 0.09114784002304077, 'timestamp': '2025-09-30 22:44:06.096232', 'step': 19804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:06.152571', 'step': 19804, 'epoch': 3} {'type': 'loss', 'content': 0.06841693073511124, 'timestamp': '2025-09-30 22:44:06.159410', 'step': 19805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:06.233822', 'step': 19805, 'epoch': 3} {'type': 'loss', 'content': 0.10911069065332413, 'timestamp': '2025-09-30 22:44:06.236662', 'step': 19806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:06.298425', 'step': 19806, 'epoch': 3} {'type': 'loss', 'content': 0.0826151967048645, 'timestamp': '2025-09-30 22:44:06.301099', 'step': 19807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:06.365456', 'step': 19807, 'epoch': 3} {'type': 'loss', 'content': 0.0738397166132927, 'timestamp': '2025-09-30 22:44:06.372312', 'step': 19808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:06.434779', 'step': 19808, 'epoch': 3} {'type': 'loss', 'content': 0.12310533225536346, 'timestamp': '2025-09-30 22:44:06.441303', 'step': 19809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:06.501031', 'step': 19809, 'epoch': 3} {'type': 'loss', 'content': 0.07437356561422348, 'timestamp': '2025-09-30 22:44:06.503680', 'step': 19810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:06.562254', 'step': 19810, 'epoch': 3} {'type': 'loss', 'content': 0.18126894533634186, 'timestamp': '2025-09-30 22:44:06.565533', 'step': 19811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:06.626893', 'step': 19811, 'epoch': 3} {'type': 'loss', 'content': 0.056525200605392456, 'timestamp': '2025-09-30 22:44:06.633300', 'step': 19812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:06.689670', 'step': 19812, 'epoch': 3} {'type': 'loss', 'content': 0.22189833223819733, 'timestamp': '2025-09-30 22:44:06.692733', 'step': 19813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:06.753269', 'step': 19813, 'epoch': 3} {'type': 'loss', 'content': 0.14995889365673065, 'timestamp': '2025-09-30 22:44:06.757652', 'step': 19814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:06.817204', 'step': 19814, 'epoch': 3} {'type': 'loss', 'content': 0.13959020376205444, 'timestamp': '2025-09-30 22:44:06.820096', 'step': 19815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:06.881689', 'step': 19815, 'epoch': 3} {'type': 'loss', 'content': 0.03611403703689575, 'timestamp': '2025-09-30 22:44:06.891770', 'step': 19816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:06.952527', 'step': 19816, 'epoch': 3} {'type': 'loss', 'content': 0.07748361676931381, 'timestamp': '2025-09-30 22:44:06.956359', 'step': 19817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:07.020300', 'step': 19817, 'epoch': 3} {'type': 'loss', 'content': 0.06059037148952484, 'timestamp': '2025-09-30 22:44:07.030113', 'step': 19818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:07.094125', 'step': 19818, 'epoch': 3} {'type': 'loss', 'content': 0.11678508669137955, 'timestamp': '2025-09-30 22:44:07.097105', 'step': 19819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:07.155125', 'step': 19819, 'epoch': 3} {'type': 'loss', 'content': 0.1190914437174797, 'timestamp': '2025-09-30 22:44:07.161149', 'step': 19820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:07.223502', 'step': 19820, 'epoch': 3} {'type': 'loss', 'content': 0.05293959751725197, 'timestamp': '2025-09-30 22:44:07.229337', 'step': 19821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:07.302097', 'step': 19821, 'epoch': 3} {'type': 'loss', 'content': 0.1326589733362198, 'timestamp': '2025-09-30 22:44:07.304734', 'step': 19822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:07.367245', 'step': 19822, 'epoch': 3} {'type': 'loss', 'content': 0.04928676411509514, 'timestamp': '2025-09-30 22:44:07.371206', 'step': 19823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:07.435787', 'step': 19823, 'epoch': 3} {'type': 'loss', 'content': 0.016998931765556335, 'timestamp': '2025-09-30 22:44:07.442078', 'step': 19824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:07.511790', 'step': 19824, 'epoch': 3} {'type': 'loss', 'content': 0.060109518468379974, 'timestamp': '2025-09-30 22:44:07.516562', 'step': 19825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:07.575773', 'step': 19825, 'epoch': 3} {'type': 'loss', 'content': 0.05599484592676163, 'timestamp': '2025-09-30 22:44:07.583554', 'step': 19826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:07.647194', 'step': 19826, 'epoch': 3} {'type': 'loss', 'content': 0.10161743313074112, 'timestamp': '2025-09-30 22:44:07.650050', 'step': 19827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:07.706973', 'step': 19827, 'epoch': 3} {'type': 'loss', 'content': 0.13008862733840942, 'timestamp': '2025-09-30 22:44:07.715800', 'step': 19828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:07.775146', 'step': 19828, 'epoch': 3} {'type': 'loss', 'content': 0.045097898691892624, 'timestamp': '2025-09-30 22:44:07.780009', 'step': 19829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:07.838564', 'step': 19829, 'epoch': 3} {'type': 'loss', 'content': 0.08125649392604828, 'timestamp': '2025-09-30 22:44:07.840928', 'step': 19830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:07.899832', 'step': 19830, 'epoch': 3} {'type': 'loss', 'content': 0.12871527671813965, 'timestamp': '2025-09-30 22:44:07.902403', 'step': 19831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:07.963872', 'step': 19831, 'epoch': 3} {'type': 'loss', 'content': 0.10061874240636826, 'timestamp': '2025-09-30 22:44:07.973687', 'step': 19832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:08.032967', 'step': 19832, 'epoch': 3} {'type': 'loss', 'content': 0.04644047096371651, 'timestamp': '2025-09-30 22:44:08.037052', 'step': 19833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:08.095849', 'step': 19833, 'epoch': 3} {'type': 'loss', 'content': 0.1610569953918457, 'timestamp': '2025-09-30 22:44:08.102396', 'step': 19834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:08.176027', 'step': 19834, 'epoch': 3} {'type': 'loss', 'content': 0.09533378481864929, 'timestamp': '2025-09-30 22:44:08.182734', 'step': 19835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:08.248892', 'step': 19835, 'epoch': 3} {'type': 'loss', 'content': 0.06120135262608528, 'timestamp': '2025-09-30 22:44:08.255815', 'step': 19836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:08.320130', 'step': 19836, 'epoch': 3} {'type': 'loss', 'content': 0.12542955577373505, 'timestamp': '2025-09-30 22:44:08.324425', 'step': 19837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:08.385438', 'step': 19837, 'epoch': 3} {'type': 'loss', 'content': 0.06717005372047424, 'timestamp': '2025-09-30 22:44:08.388083', 'step': 19838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:08.450588', 'step': 19838, 'epoch': 3} {'type': 'loss', 'content': 0.055332448333501816, 'timestamp': '2025-09-30 22:44:08.456189', 'step': 19839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:08.519602', 'step': 19839, 'epoch': 3} {'type': 'loss', 'content': 0.086872898042202, 'timestamp': '2025-09-30 22:44:08.528810', 'step': 19840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:08.595147', 'step': 19840, 'epoch': 3} {'type': 'loss', 'content': 0.0664694681763649, 'timestamp': '2025-09-30 22:44:08.597700', 'step': 19841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:08.680530', 'step': 19841, 'epoch': 3} {'type': 'loss', 'content': 0.04681258648633957, 'timestamp': '2025-09-30 22:44:08.687015', 'step': 19842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:08.771745', 'step': 19842, 'epoch': 3} {'type': 'loss', 'content': 0.11417403072118759, 'timestamp': '2025-09-30 22:44:08.776261', 'step': 19843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:08.860394', 'step': 19843, 'epoch': 3} {'type': 'loss', 'content': 0.07178706675767899, 'timestamp': '2025-09-30 22:44:08.867387', 'step': 19844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:08.933402', 'step': 19844, 'epoch': 3} {'type': 'loss', 'content': 0.1009577065706253, 'timestamp': '2025-09-30 22:44:08.939370', 'step': 19845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:08.997688', 'step': 19845, 'epoch': 3} {'type': 'loss', 'content': 0.085275799036026, 'timestamp': '2025-09-30 22:44:09.000833', 'step': 19846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:09.061873', 'step': 19846, 'epoch': 3} {'type': 'loss', 'content': 0.19524072110652924, 'timestamp': '2025-09-30 22:44:09.064849', 'step': 19847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:09.131075', 'step': 19847, 'epoch': 3} {'type': 'loss', 'content': 0.11005032807588577, 'timestamp': '2025-09-30 22:44:09.142275', 'step': 19848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:09.206706', 'step': 19848, 'epoch': 3} {'type': 'loss', 'content': 0.0942874625325203, 'timestamp': '2025-09-30 22:44:09.212954', 'step': 19849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:09.280106', 'step': 19849, 'epoch': 3} {'type': 'loss', 'content': 0.10630761086940765, 'timestamp': '2025-09-30 22:44:09.291532', 'step': 19850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:09.358451', 'step': 19850, 'epoch': 3} {'type': 'loss', 'content': 0.06121112406253815, 'timestamp': '2025-09-30 22:44:09.361605', 'step': 19851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:09.424234', 'step': 19851, 'epoch': 3} {'type': 'loss', 'content': 0.11826871335506439, 'timestamp': '2025-09-30 22:44:09.432307', 'step': 19852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:09.492865', 'step': 19852, 'epoch': 3} {'type': 'loss', 'content': 0.03571971133351326, 'timestamp': '2025-09-30 22:44:09.498448', 'step': 19853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:09.568190', 'step': 19853, 'epoch': 3} {'type': 'loss', 'content': 0.1258941888809204, 'timestamp': '2025-09-30 22:44:09.570696', 'step': 19854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:09.629992', 'step': 19854, 'epoch': 3} {'type': 'loss', 'content': 0.0858403742313385, 'timestamp': '2025-09-30 22:44:09.633184', 'step': 19855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:09.691104', 'step': 19855, 'epoch': 3} {'type': 'loss', 'content': 0.08346305042505264, 'timestamp': '2025-09-30 22:44:09.698155', 'step': 19856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:09.758698', 'step': 19856, 'epoch': 3} {'type': 'loss', 'content': 0.1130022332072258, 'timestamp': '2025-09-30 22:44:09.761095', 'step': 19857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:09.822341', 'step': 19857, 'epoch': 3} {'type': 'loss', 'content': 0.08881207555532455, 'timestamp': '2025-09-30 22:44:09.825427', 'step': 19858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:09.895730', 'step': 19858, 'epoch': 3} {'type': 'loss', 'content': 0.09080095589160919, 'timestamp': '2025-09-30 22:44:09.898635', 'step': 19859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:09.970359', 'step': 19859, 'epoch': 3} {'type': 'loss', 'content': 0.06096329167485237, 'timestamp': '2025-09-30 22:44:09.983793', 'step': 19860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:10.044200', 'step': 19860, 'epoch': 3} {'type': 'loss', 'content': 0.09194988012313843, 'timestamp': '2025-09-30 22:44:10.059404', 'step': 19861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:10.121634', 'step': 19861, 'epoch': 3} {'type': 'loss', 'content': 0.13238608837127686, 'timestamp': '2025-09-30 22:44:10.128969', 'step': 19862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:10.196778', 'step': 19862, 'epoch': 3} {'type': 'loss', 'content': 0.03615584969520569, 'timestamp': '2025-09-30 22:44:10.202782', 'step': 19863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:10.268985', 'step': 19863, 'epoch': 3} {'type': 'loss', 'content': 0.19784849882125854, 'timestamp': '2025-09-30 22:44:10.281997', 'step': 19864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:10.351237', 'step': 19864, 'epoch': 3} {'type': 'loss', 'content': 0.1203671395778656, 'timestamp': '2025-09-30 22:44:10.353930', 'step': 19865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:10.414763', 'step': 19865, 'epoch': 3} {'type': 'loss', 'content': 0.19474747776985168, 'timestamp': '2025-09-30 22:44:10.422362', 'step': 19866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:10.493445', 'step': 19866, 'epoch': 3} {'type': 'loss', 'content': 0.14369924366474152, 'timestamp': '2025-09-30 22:44:10.497878', 'step': 19867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:10.565979', 'step': 19867, 'epoch': 3} {'type': 'loss', 'content': 0.05615692585706711, 'timestamp': '2025-09-30 22:44:10.574044', 'step': 19868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:10.645184', 'step': 19868, 'epoch': 3} {'type': 'loss', 'content': 0.08771118521690369, 'timestamp': '2025-09-30 22:44:10.653176', 'step': 19869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:10.730756', 'step': 19869, 'epoch': 3} {'type': 'loss', 'content': 0.0393228717148304, 'timestamp': '2025-09-30 22:44:10.734773', 'step': 19870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:10.797009', 'step': 19870, 'epoch': 3} {'type': 'loss', 'content': 0.05444229394197464, 'timestamp': '2025-09-30 22:44:10.800540', 'step': 19871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:10.859753', 'step': 19871, 'epoch': 3} {'type': 'loss', 'content': 0.10648693889379501, 'timestamp': '2025-09-30 22:44:10.866795', 'step': 19872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:10.924604', 'step': 19872, 'epoch': 3} {'type': 'loss', 'content': 0.08840102702379227, 'timestamp': '2025-09-30 22:44:10.927424', 'step': 19873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:10.987523', 'step': 19873, 'epoch': 3} {'type': 'loss', 'content': 0.0887674018740654, 'timestamp': '2025-09-30 22:44:10.994531', 'step': 19874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:11.070521', 'step': 19874, 'epoch': 3} {'type': 'loss', 'content': 0.08283444494009018, 'timestamp': '2025-09-30 22:44:11.079026', 'step': 19875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:11.146714', 'step': 19875, 'epoch': 3} {'type': 'loss', 'content': 0.10010150074958801, 'timestamp': '2025-09-30 22:44:11.159642', 'step': 19876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:11.218377', 'step': 19876, 'epoch': 3} {'type': 'loss', 'content': 0.08708050847053528, 'timestamp': '2025-09-30 22:44:11.221140', 'step': 19877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:11.292718', 'step': 19877, 'epoch': 3} {'type': 'loss', 'content': 0.06139533594250679, 'timestamp': '2025-09-30 22:44:11.295848', 'step': 19878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:11.354981', 'step': 19878, 'epoch': 3} {'type': 'loss', 'content': 0.13186150789260864, 'timestamp': '2025-09-30 22:44:11.362515', 'step': 19879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:11.423872', 'step': 19879, 'epoch': 3} {'type': 'loss', 'content': 0.1078391820192337, 'timestamp': '2025-09-30 22:44:11.430800', 'step': 19880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:11.489691', 'step': 19880, 'epoch': 3} {'type': 'loss', 'content': 0.06770401448011398, 'timestamp': '2025-09-30 22:44:11.496665', 'step': 19881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:11.563225', 'step': 19881, 'epoch': 3} {'type': 'loss', 'content': 0.1331566572189331, 'timestamp': '2025-09-30 22:44:11.570499', 'step': 19882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:11.629447', 'step': 19882, 'epoch': 3} {'type': 'loss', 'content': 0.019517358392477036, 'timestamp': '2025-09-30 22:44:11.632887', 'step': 19883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:11.704506', 'step': 19883, 'epoch': 3} {'type': 'loss', 'content': 0.07130476087331772, 'timestamp': '2025-09-30 22:44:11.710964', 'step': 19884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:11.774893', 'step': 19884, 'epoch': 3} {'type': 'loss', 'content': 0.06526756286621094, 'timestamp': '2025-09-30 22:44:11.783055', 'step': 19885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:11.848080', 'step': 19885, 'epoch': 3} {'type': 'loss', 'content': 0.09652051329612732, 'timestamp': '2025-09-30 22:44:11.856334', 'step': 19886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:11.918019', 'step': 19886, 'epoch': 3} {'type': 'loss', 'content': 0.10272807627916336, 'timestamp': '2025-09-30 22:44:11.926746', 'step': 19887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:11.993634', 'step': 19887, 'epoch': 3} {'type': 'loss', 'content': 0.024585727602243423, 'timestamp': '2025-09-30 22:44:12.000665', 'step': 19888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:12.058911', 'step': 19888, 'epoch': 3} {'type': 'loss', 'content': 0.18278665840625763, 'timestamp': '2025-09-30 22:44:12.068287', 'step': 19889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:12.132369', 'step': 19889, 'epoch': 3} {'type': 'loss', 'content': 0.11712039262056351, 'timestamp': '2025-09-30 22:44:12.135275', 'step': 19890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:12.208182', 'step': 19890, 'epoch': 3} {'type': 'loss', 'content': 0.07128491252660751, 'timestamp': '2025-09-30 22:44:12.211145', 'step': 19891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:12.282987', 'step': 19891, 'epoch': 3} {'type': 'loss', 'content': 0.01386168971657753, 'timestamp': '2025-09-30 22:44:12.289888', 'step': 19892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:12.349129', 'step': 19892, 'epoch': 3} {'type': 'loss', 'content': 0.12512056529521942, 'timestamp': '2025-09-30 22:44:12.352343', 'step': 19893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:12.413369', 'step': 19893, 'epoch': 3} {'type': 'loss', 'content': 0.1279846429824829, 'timestamp': '2025-09-30 22:44:12.416212', 'step': 19894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:12.475429', 'step': 19894, 'epoch': 3} {'type': 'loss', 'content': 0.05998701974749565, 'timestamp': '2025-09-30 22:44:12.478610', 'step': 19895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:12.543342', 'step': 19895, 'epoch': 3} {'type': 'loss', 'content': 0.08996251225471497, 'timestamp': '2025-09-30 22:44:12.555012', 'step': 19896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:12.619064', 'step': 19896, 'epoch': 3} {'type': 'loss', 'content': 0.08857747912406921, 'timestamp': '2025-09-30 22:44:12.621793', 'step': 19897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:12.681499', 'step': 19897, 'epoch': 3} {'type': 'loss', 'content': 0.0516703724861145, 'timestamp': '2025-09-30 22:44:12.684542', 'step': 19898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:12.746095', 'step': 19898, 'epoch': 3} {'type': 'loss', 'content': 0.16018083691596985, 'timestamp': '2025-09-30 22:44:12.753909', 'step': 19899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:12.818745', 'step': 19899, 'epoch': 3} {'type': 'loss', 'content': 0.0772339329123497, 'timestamp': '2025-09-30 22:44:12.825809', 'step': 19900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:12.882571', 'step': 19900, 'epoch': 3} {'type': 'loss', 'content': 0.07297654449939728, 'timestamp': '2025-09-30 22:44:12.885378', 'step': 19901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:12.943364', 'step': 19901, 'epoch': 3} {'type': 'loss', 'content': 0.088089220225811, 'timestamp': '2025-09-30 22:44:12.947209', 'step': 19902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:13.023473', 'step': 19902, 'epoch': 3} {'type': 'loss', 'content': 0.047816820442676544, 'timestamp': '2025-09-30 22:44:13.032243', 'step': 19903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:13.104408', 'step': 19903, 'epoch': 3} {'type': 'loss', 'content': 0.10932515561580658, 'timestamp': '2025-09-30 22:44:13.111135', 'step': 19904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:13.177594', 'step': 19904, 'epoch': 3} {'type': 'loss', 'content': 0.09909571707248688, 'timestamp': '2025-09-30 22:44:13.182557', 'step': 19905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:13.248143', 'step': 19905, 'epoch': 3} {'type': 'loss', 'content': 0.10265129059553146, 'timestamp': '2025-09-30 22:44:13.255759', 'step': 19906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:13.319145', 'step': 19906, 'epoch': 3} {'type': 'loss', 'content': 0.08756260573863983, 'timestamp': '2025-09-30 22:44:13.322416', 'step': 19907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:13.390845', 'step': 19907, 'epoch': 3} {'type': 'loss', 'content': 0.06746380776166916, 'timestamp': '2025-09-30 22:44:13.397588', 'step': 19908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:13.455300', 'step': 19908, 'epoch': 3} {'type': 'loss', 'content': 0.0475601889193058, 'timestamp': '2025-09-30 22:44:13.458536', 'step': 19909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:13.519393', 'step': 19909, 'epoch': 3} {'type': 'loss', 'content': 0.06423290818929672, 'timestamp': '2025-09-30 22:44:13.522526', 'step': 19910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:13.581801', 'step': 19910, 'epoch': 3} {'type': 'loss', 'content': 0.14173544943332672, 'timestamp': '2025-09-30 22:44:13.584847', 'step': 19911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:13.641215', 'step': 19911, 'epoch': 3} {'type': 'loss', 'content': 0.12526313960552216, 'timestamp': '2025-09-30 22:44:13.651468', 'step': 19912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:13.719949', 'step': 19912, 'epoch': 3} {'type': 'loss', 'content': 0.061635084450244904, 'timestamp': '2025-09-30 22:44:13.722230', 'step': 19913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:13.779117', 'step': 19913, 'epoch': 3} {'type': 'loss', 'content': 0.15093599259853363, 'timestamp': '2025-09-30 22:44:13.787053', 'step': 19914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:13.853542', 'step': 19914, 'epoch': 3} {'type': 'loss', 'content': 0.1074291467666626, 'timestamp': '2025-09-30 22:44:13.856295', 'step': 19915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:13.913790', 'step': 19915, 'epoch': 3} {'type': 'loss', 'content': 0.06491855531930923, 'timestamp': '2025-09-30 22:44:13.919953', 'step': 19916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:13.976432', 'step': 19916, 'epoch': 3} {'type': 'loss', 'content': 0.059691838920116425, 'timestamp': '2025-09-30 22:44:13.979338', 'step': 19917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:14.056377', 'step': 19917, 'epoch': 3} {'type': 'loss', 'content': 0.060136452317237854, 'timestamp': '2025-09-30 22:44:14.059534', 'step': 19918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:14.116842', 'step': 19918, 'epoch': 3} {'type': 'loss', 'content': 0.18437404930591583, 'timestamp': '2025-09-30 22:44:14.119482', 'step': 19919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:44:14.183620', 'step': 19919, 'epoch': 3} {'type': 'loss', 'content': 0.07491113245487213, 'timestamp': '2025-09-30 22:44:14.190160', 'step': 19920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:14.256136', 'step': 19920, 'epoch': 3} {'type': 'loss', 'content': 0.06916873902082443, 'timestamp': '2025-09-30 22:44:14.259351', 'step': 19921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:14.326441', 'step': 19921, 'epoch': 3} {'type': 'loss', 'content': 0.0838330090045929, 'timestamp': '2025-09-30 22:44:14.329497', 'step': 19922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:14.390233', 'step': 19922, 'epoch': 3} {'type': 'loss', 'content': 0.11640813946723938, 'timestamp': '2025-09-30 22:44:14.396216', 'step': 19923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:14.460788', 'step': 19923, 'epoch': 3} {'type': 'loss', 'content': 0.05545869097113609, 'timestamp': '2025-09-30 22:44:14.467699', 'step': 19924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:14.530739', 'step': 19924, 'epoch': 3} {'type': 'loss', 'content': 0.04829033836722374, 'timestamp': '2025-09-30 22:44:14.534815', 'step': 19925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:14.597700', 'step': 19925, 'epoch': 3} {'type': 'loss', 'content': 0.07852919399738312, 'timestamp': '2025-09-30 22:44:14.606654', 'step': 19926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:14.682555', 'step': 19926, 'epoch': 3} {'type': 'loss', 'content': 0.07166401296854019, 'timestamp': '2025-09-30 22:44:14.685330', 'step': 19927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:14.751186', 'step': 19927, 'epoch': 3} {'type': 'loss', 'content': 0.08847231417894363, 'timestamp': '2025-09-30 22:44:14.763509', 'step': 19928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:14.822602', 'step': 19928, 'epoch': 3} {'type': 'loss', 'content': 0.1176246851682663, 'timestamp': '2025-09-30 22:44:14.825781', 'step': 19929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:14.883384', 'step': 19929, 'epoch': 3} {'type': 'loss', 'content': 0.0838543251156807, 'timestamp': '2025-09-30 22:44:14.886521', 'step': 19930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:14.950147', 'step': 19930, 'epoch': 3} {'type': 'loss', 'content': 0.11262030899524689, 'timestamp': '2025-09-30 22:44:14.953208', 'step': 19931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:15.021861', 'step': 19931, 'epoch': 3} {'type': 'loss', 'content': 0.0717230886220932, 'timestamp': '2025-09-30 22:44:15.034057', 'step': 19932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:15.098216', 'step': 19932, 'epoch': 3} {'type': 'loss', 'content': 0.11677727848291397, 'timestamp': '2025-09-30 22:44:15.109239', 'step': 19933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:15.171755', 'step': 19933, 'epoch': 3} {'type': 'loss', 'content': 0.07936539500951767, 'timestamp': '2025-09-30 22:44:15.181716', 'step': 19934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:15.253415', 'step': 19934, 'epoch': 3} {'type': 'loss', 'content': 0.029588965699076653, 'timestamp': '2025-09-30 22:44:15.261289', 'step': 19935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:15.323669', 'step': 19935, 'epoch': 3} {'type': 'loss', 'content': 0.08508139848709106, 'timestamp': '2025-09-30 22:44:15.330355', 'step': 19936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:15.387323', 'step': 19936, 'epoch': 3} {'type': 'loss', 'content': 0.12990790605545044, 'timestamp': '2025-09-30 22:44:15.394742', 'step': 19937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:15.458739', 'step': 19937, 'epoch': 3} {'type': 'loss', 'content': 0.11379434168338776, 'timestamp': '2025-09-30 22:44:15.462355', 'step': 19938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:15.539424', 'step': 19938, 'epoch': 3} {'type': 'loss', 'content': 0.0892556756734848, 'timestamp': '2025-09-30 22:44:15.543117', 'step': 19939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:15.621180', 'step': 19939, 'epoch': 3} {'type': 'loss', 'content': 0.0988810732960701, 'timestamp': '2025-09-30 22:44:15.633741', 'step': 19940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:15.712550', 'step': 19940, 'epoch': 3} {'type': 'loss', 'content': 0.02243841253221035, 'timestamp': '2025-09-30 22:44:15.715866', 'step': 19941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:15.781599', 'step': 19941, 'epoch': 3} {'type': 'loss', 'content': 0.09129611402750015, 'timestamp': '2025-09-30 22:44:15.785339', 'step': 19942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:15.846995', 'step': 19942, 'epoch': 3} {'type': 'loss', 'content': 0.06555617600679398, 'timestamp': '2025-09-30 22:44:15.850252', 'step': 19943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:15.922128', 'step': 19943, 'epoch': 3} {'type': 'loss', 'content': 0.11596274375915527, 'timestamp': '2025-09-30 22:44:15.928568', 'step': 19944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:15.986537', 'step': 19944, 'epoch': 3} {'type': 'loss', 'content': 0.12861531972885132, 'timestamp': '2025-09-30 22:44:15.989981', 'step': 19945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:16.047775', 'step': 19945, 'epoch': 3} {'type': 'loss', 'content': 0.10586568713188171, 'timestamp': '2025-09-30 22:44:16.057934', 'step': 19946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:16.116746', 'step': 19946, 'epoch': 3} {'type': 'loss', 'content': 0.13635757565498352, 'timestamp': '2025-09-30 22:44:16.120180', 'step': 19947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:16.178200', 'step': 19947, 'epoch': 3} {'type': 'loss', 'content': 0.13825684785842896, 'timestamp': '2025-09-30 22:44:16.195902', 'step': 19948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:16.262558', 'step': 19948, 'epoch': 3} {'type': 'loss', 'content': 0.10819610208272934, 'timestamp': '2025-09-30 22:44:16.265277', 'step': 19949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:16.323312', 'step': 19949, 'epoch': 3} {'type': 'loss', 'content': 0.08646399527788162, 'timestamp': '2025-09-30 22:44:16.325914', 'step': 19950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:16.388075', 'step': 19950, 'epoch': 3} {'type': 'loss', 'content': 0.0918436273932457, 'timestamp': '2025-09-30 22:44:16.391749', 'step': 19951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:16.451829', 'step': 19951, 'epoch': 3} {'type': 'loss', 'content': 0.10080574452877045, 'timestamp': '2025-09-30 22:44:16.458656', 'step': 19952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:16.516837', 'step': 19952, 'epoch': 3} {'type': 'loss', 'content': 0.10933248698711395, 'timestamp': '2025-09-30 22:44:16.520080', 'step': 19953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:16.577989', 'step': 19953, 'epoch': 3} {'type': 'loss', 'content': 0.10048729926347733, 'timestamp': '2025-09-30 22:44:16.581451', 'step': 19954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:16.639731', 'step': 19954, 'epoch': 3} {'type': 'loss', 'content': 0.07864373922348022, 'timestamp': '2025-09-30 22:44:16.642498', 'step': 19955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:16.700350', 'step': 19955, 'epoch': 3} {'type': 'loss', 'content': 0.12365405261516571, 'timestamp': '2025-09-30 22:44:16.707353', 'step': 19956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:16.790657', 'step': 19956, 'epoch': 3} {'type': 'loss', 'content': 0.09989191591739655, 'timestamp': '2025-09-30 22:44:16.793255', 'step': 19957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:16.858685', 'step': 19957, 'epoch': 3} {'type': 'loss', 'content': 0.1112094596028328, 'timestamp': '2025-09-30 22:44:16.866635', 'step': 19958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:16.928588', 'step': 19958, 'epoch': 3} {'type': 'loss', 'content': 0.1329062283039093, 'timestamp': '2025-09-30 22:44:16.935209', 'step': 19959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:16.999082', 'step': 19959, 'epoch': 3} {'type': 'loss', 'content': 0.11678656935691833, 'timestamp': '2025-09-30 22:44:17.006328', 'step': 19960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:17.071397', 'step': 19960, 'epoch': 3} {'type': 'loss', 'content': 0.07187197357416153, 'timestamp': '2025-09-30 22:44:17.074469', 'step': 19961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:17.138079', 'step': 19961, 'epoch': 3} {'type': 'loss', 'content': 0.1023804321885109, 'timestamp': '2025-09-30 22:44:17.140767', 'step': 19962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:17.205351', 'step': 19962, 'epoch': 3} {'type': 'loss', 'content': 0.10677579045295715, 'timestamp': '2025-09-30 22:44:17.209456', 'step': 19963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:17.271524', 'step': 19963, 'epoch': 3} {'type': 'loss', 'content': 0.10345935076475143, 'timestamp': '2025-09-30 22:44:17.277877', 'step': 19964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:17.337910', 'step': 19964, 'epoch': 3} {'type': 'loss', 'content': 0.09122326225042343, 'timestamp': '2025-09-30 22:44:17.339843', 'step': 19965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:17.401887', 'step': 19965, 'epoch': 3} {'type': 'loss', 'content': 0.11147302389144897, 'timestamp': '2025-09-30 22:44:17.405268', 'step': 19966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:17.465447', 'step': 19966, 'epoch': 3} {'type': 'loss', 'content': 0.11833003908395767, 'timestamp': '2025-09-30 22:44:17.473892', 'step': 19967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:17.545681', 'step': 19967, 'epoch': 3} {'type': 'loss', 'content': 0.0719253271818161, 'timestamp': '2025-09-30 22:44:17.552100', 'step': 19968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:44:17.609382', 'step': 19968, 'epoch': 3} {'type': 'loss', 'content': 0.05317959934473038, 'timestamp': '2025-09-30 22:44:17.613147', 'step': 19969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:17.670536', 'step': 19969, 'epoch': 3} {'type': 'loss', 'content': 0.08799712359905243, 'timestamp': '2025-09-30 22:44:17.673386', 'step': 19970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:17.732989', 'step': 19970, 'epoch': 3} {'type': 'loss', 'content': 0.06171911582350731, 'timestamp': '2025-09-30 22:44:17.739235', 'step': 19971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:17.804088', 'step': 19971, 'epoch': 3} {'type': 'loss', 'content': 0.10177260637283325, 'timestamp': '2025-09-30 22:44:17.812024', 'step': 19972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:17.871089', 'step': 19972, 'epoch': 3} {'type': 'loss', 'content': 0.08476340770721436, 'timestamp': '2025-09-30 22:44:17.876765', 'step': 19973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:17.933751', 'step': 19973, 'epoch': 3} {'type': 'loss', 'content': 0.06751607358455658, 'timestamp': '2025-09-30 22:44:17.944021', 'step': 19974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:18.002715', 'step': 19974, 'epoch': 3} {'type': 'loss', 'content': 0.1174798533320427, 'timestamp': '2025-09-30 22:44:18.015104', 'step': 19975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:18.076216', 'step': 19975, 'epoch': 3} {'type': 'loss', 'content': 0.06564919650554657, 'timestamp': '2025-09-30 22:44:18.089527', 'step': 19976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:18.154110', 'step': 19976, 'epoch': 3} {'type': 'loss', 'content': 0.18969187140464783, 'timestamp': '2025-09-30 22:44:18.156917', 'step': 19977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:18.213727', 'step': 19977, 'epoch': 3} {'type': 'loss', 'content': 0.03221778944134712, 'timestamp': '2025-09-30 22:44:18.223404', 'step': 19978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:18.286246', 'step': 19978, 'epoch': 3} {'type': 'loss', 'content': 0.06753303855657578, 'timestamp': '2025-09-30 22:44:18.289539', 'step': 19979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:18.348528', 'step': 19979, 'epoch': 3} {'type': 'loss', 'content': 0.06408809125423431, 'timestamp': '2025-09-30 22:44:18.361381', 'step': 19980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:18.426745', 'step': 19980, 'epoch': 3} {'type': 'loss', 'content': 0.062448449432849884, 'timestamp': '2025-09-30 22:44:18.436104', 'step': 19981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:18.495732', 'step': 19981, 'epoch': 3} {'type': 'loss', 'content': 0.04529356211423874, 'timestamp': '2025-09-30 22:44:18.502679', 'step': 19982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:18.561217', 'step': 19982, 'epoch': 3} {'type': 'loss', 'content': 0.05717498064041138, 'timestamp': '2025-09-30 22:44:18.567336', 'step': 19983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:18.631239', 'step': 19983, 'epoch': 3} {'type': 'loss', 'content': 0.13995423913002014, 'timestamp': '2025-09-30 22:44:18.638691', 'step': 19984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:18.701203', 'step': 19984, 'epoch': 3} {'type': 'loss', 'content': 0.0979221984744072, 'timestamp': '2025-09-30 22:44:18.710204', 'step': 19985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:18.773745', 'step': 19985, 'epoch': 3} {'type': 'loss', 'content': 0.16376307606697083, 'timestamp': '2025-09-30 22:44:18.776775', 'step': 19986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:18.837975', 'step': 19986, 'epoch': 3} {'type': 'loss', 'content': 0.06111278384923935, 'timestamp': '2025-09-30 22:44:18.850975', 'step': 19987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:18.918085', 'step': 19987, 'epoch': 3} {'type': 'loss', 'content': 0.014518603682518005, 'timestamp': '2025-09-30 22:44:18.929796', 'step': 19988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:18.988369', 'step': 19988, 'epoch': 3} {'type': 'loss', 'content': 0.09485968202352524, 'timestamp': '2025-09-30 22:44:18.991120', 'step': 19989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:19.063221', 'step': 19989, 'epoch': 3} {'type': 'loss', 'content': 0.1112477257847786, 'timestamp': '2025-09-30 22:44:19.066850', 'step': 19990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:19.127660', 'step': 19990, 'epoch': 3} {'type': 'loss', 'content': 0.08390016108751297, 'timestamp': '2025-09-30 22:44:19.131937', 'step': 19991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:19.192364', 'step': 19991, 'epoch': 3} {'type': 'loss', 'content': 0.01831660605967045, 'timestamp': '2025-09-30 22:44:19.199978', 'step': 19992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:19.264417', 'step': 19992, 'epoch': 3} {'type': 'loss', 'content': 0.07133182883262634, 'timestamp': '2025-09-30 22:44:19.272122', 'step': 19993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:19.346439', 'step': 19993, 'epoch': 3} {'type': 'loss', 'content': 0.11162275820970535, 'timestamp': '2025-09-30 22:44:19.349743', 'step': 19994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:19.417479', 'step': 19994, 'epoch': 3} {'type': 'loss', 'content': 0.0852598249912262, 'timestamp': '2025-09-30 22:44:19.426490', 'step': 19995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:19.484629', 'step': 19995, 'epoch': 3} {'type': 'loss', 'content': 0.09623006731271744, 'timestamp': '2025-09-30 22:44:19.495896', 'step': 19996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:19.555232', 'step': 19996, 'epoch': 3} {'type': 'loss', 'content': 0.04855211451649666, 'timestamp': '2025-09-30 22:44:19.568035', 'step': 19997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:19.636506', 'step': 19997, 'epoch': 3} {'type': 'loss', 'content': 0.0915679931640625, 'timestamp': '2025-09-30 22:44:19.640251', 'step': 19998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:19.699863', 'step': 19998, 'epoch': 3} {'type': 'loss', 'content': 0.07343856245279312, 'timestamp': '2025-09-30 22:44:19.703272', 'step': 19999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:19.774817', 'step': 19999, 'epoch': 3} {'type': 'loss', 'content': 0.03711182624101639, 'timestamp': '2025-09-30 22:44:19.781110', 'step': 20000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20000', 'timestamp': '2025-09-30 22:44:20.220529', 'step': 20000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:20.289906', 'step': 20000, 'epoch': 3} {'type': 'loss', 'content': 0.12147808074951172, 'timestamp': '2025-09-30 22:44:20.300996', 'step': 20001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:20.379244', 'step': 20001, 'epoch': 3} {'type': 'loss', 'content': 0.2183988094329834, 'timestamp': '2025-09-30 22:44:20.382742', 'step': 20002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:20.455190', 'step': 20002, 'epoch': 3} {'type': 'loss', 'content': 0.07953759282827377, 'timestamp': '2025-09-30 22:44:20.464567', 'step': 20003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:20.527715', 'step': 20003, 'epoch': 3} {'type': 'loss', 'content': 0.035918667912483215, 'timestamp': '2025-09-30 22:44:20.534533', 'step': 20004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:20.592566', 'step': 20004, 'epoch': 3} {'type': 'loss', 'content': 0.07131651043891907, 'timestamp': '2025-09-30 22:44:20.604941', 'step': 20005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:20.684460', 'step': 20005, 'epoch': 3} {'type': 'loss', 'content': 0.07551077008247375, 'timestamp': '2025-09-30 22:44:20.688300', 'step': 20006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:20.748520', 'step': 20006, 'epoch': 3} {'type': 'loss', 'content': 0.1545793116092682, 'timestamp': '2025-09-30 22:44:20.756846', 'step': 20007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:20.831132', 'step': 20007, 'epoch': 3} {'type': 'loss', 'content': 0.14680014550685883, 'timestamp': '2025-09-30 22:44:20.838242', 'step': 20008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:20.896451', 'step': 20008, 'epoch': 3} {'type': 'loss', 'content': 0.05952724069356918, 'timestamp': '2025-09-30 22:44:20.899848', 'step': 20009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:20.958745', 'step': 20009, 'epoch': 3} {'type': 'loss', 'content': 0.13446418941020966, 'timestamp': '2025-09-30 22:44:20.962381', 'step': 20010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:21.022136', 'step': 20010, 'epoch': 3} {'type': 'loss', 'content': 0.15546363592147827, 'timestamp': '2025-09-30 22:44:21.025571', 'step': 20011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:21.085622', 'step': 20011, 'epoch': 3} {'type': 'loss', 'content': 0.10511573404073715, 'timestamp': '2025-09-30 22:44:21.099369', 'step': 20012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:21.170066', 'step': 20012, 'epoch': 3} {'type': 'loss', 'content': 0.01970202848315239, 'timestamp': '2025-09-30 22:44:21.177510', 'step': 20013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:21.239763', 'step': 20013, 'epoch': 3} {'type': 'loss', 'content': 0.08337350934743881, 'timestamp': '2025-09-30 22:44:21.244559', 'step': 20014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:21.313574', 'step': 20014, 'epoch': 3} {'type': 'loss', 'content': 0.16108593344688416, 'timestamp': '2025-09-30 22:44:21.317485', 'step': 20015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:21.376828', 'step': 20015, 'epoch': 3} {'type': 'loss', 'content': 0.1611948311328888, 'timestamp': '2025-09-30 22:44:21.393629', 'step': 20016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:21.459385', 'step': 20016, 'epoch': 3} {'type': 'loss', 'content': 0.09591847658157349, 'timestamp': '2025-09-30 22:44:21.463229', 'step': 20017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:21.525724', 'step': 20017, 'epoch': 3} {'type': 'loss', 'content': 0.06812145560979843, 'timestamp': '2025-09-30 22:44:21.535127', 'step': 20018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:21.596370', 'step': 20018, 'epoch': 3} {'type': 'loss', 'content': 0.1342369019985199, 'timestamp': '2025-09-30 22:44:21.599512', 'step': 20019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:21.664482', 'step': 20019, 'epoch': 3} {'type': 'loss', 'content': 0.10373406112194061, 'timestamp': '2025-09-30 22:44:21.671108', 'step': 20020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:21.727959', 'step': 20020, 'epoch': 3} {'type': 'loss', 'content': 0.014110236428678036, 'timestamp': '2025-09-30 22:44:21.731894', 'step': 20021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:21.799347', 'step': 20021, 'epoch': 3} {'type': 'loss', 'content': 0.10638745874166489, 'timestamp': '2025-09-30 22:44:21.803834', 'step': 20022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:21.890125', 'step': 20022, 'epoch': 3} {'type': 'loss', 'content': 0.18052668869495392, 'timestamp': '2025-09-30 22:44:21.895033', 'step': 20023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:21.967516', 'step': 20023, 'epoch': 3} {'type': 'loss', 'content': 0.1398337483406067, 'timestamp': '2025-09-30 22:44:21.975097', 'step': 20024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:22.047642', 'step': 20024, 'epoch': 3} {'type': 'loss', 'content': 0.08685708045959473, 'timestamp': '2025-09-30 22:44:22.051034', 'step': 20025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:22.126050', 'step': 20025, 'epoch': 3} {'type': 'loss', 'content': 0.055333591997623444, 'timestamp': '2025-09-30 22:44:22.129453', 'step': 20026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:22.202694', 'step': 20026, 'epoch': 3} {'type': 'loss', 'content': 0.0742739737033844, 'timestamp': '2025-09-30 22:44:22.205349', 'step': 20027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:22.265008', 'step': 20027, 'epoch': 3} {'type': 'loss', 'content': 0.0703866109251976, 'timestamp': '2025-09-30 22:44:22.272362', 'step': 20028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:22.336826', 'step': 20028, 'epoch': 3} {'type': 'loss', 'content': 0.1804468035697937, 'timestamp': '2025-09-30 22:44:22.340270', 'step': 20029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:22.422331', 'step': 20029, 'epoch': 3} {'type': 'loss', 'content': 0.16293616592884064, 'timestamp': '2025-09-30 22:44:22.425888', 'step': 20030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:22.484372', 'step': 20030, 'epoch': 3} {'type': 'loss', 'content': 0.06397043168544769, 'timestamp': '2025-09-30 22:44:22.488637', 'step': 20031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:22.562976', 'step': 20031, 'epoch': 3} {'type': 'loss', 'content': 0.08814690262079239, 'timestamp': '2025-09-30 22:44:22.570754', 'step': 20032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:22.629637', 'step': 20032, 'epoch': 3} {'type': 'loss', 'content': 0.085098035633564, 'timestamp': '2025-09-30 22:44:22.632998', 'step': 20033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:22.691976', 'step': 20033, 'epoch': 3} {'type': 'loss', 'content': 0.14149002730846405, 'timestamp': '2025-09-30 22:44:22.696040', 'step': 20034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:22.753204', 'step': 20034, 'epoch': 3} {'type': 'loss', 'content': 0.13497985899448395, 'timestamp': '2025-09-30 22:44:22.756237', 'step': 20035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:22.814287', 'step': 20035, 'epoch': 3} {'type': 'loss', 'content': 0.09861196577548981, 'timestamp': '2025-09-30 22:44:22.821773', 'step': 20036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:22.886141', 'step': 20036, 'epoch': 3} {'type': 'loss', 'content': 0.10871243476867676, 'timestamp': '2025-09-30 22:44:22.895948', 'step': 20037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:22.954439', 'step': 20037, 'epoch': 3} {'type': 'loss', 'content': 0.12420420348644257, 'timestamp': '2025-09-30 22:44:22.958438', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:44:39.192253', 'step': 20038, 'epoch': 3} {'type': 'pplx', 'content': 9538.986235796534, 'timestamp': '2025-09-30 22:44:39.196354', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:39.259836', 'step': 20038, 'epoch': 3} {'type': 'loss', 'content': 0.08651819825172424, 'timestamp': '2025-09-30 22:44:39.263067', 'step': 20039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:39.322415', 'step': 20039, 'epoch': 3} {'type': 'loss', 'content': 0.1521557718515396, 'timestamp': '2025-09-30 22:44:39.337060', 'step': 20040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:39.394930', 'step': 20040, 'epoch': 3} {'type': 'loss', 'content': 0.1062365174293518, 'timestamp': '2025-09-30 22:44:39.397343', 'step': 20041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:39.456506', 'step': 20041, 'epoch': 3} {'type': 'loss', 'content': 0.09282499551773071, 'timestamp': '2025-09-30 22:44:39.463931', 'step': 20042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:39.522261', 'step': 20042, 'epoch': 3} {'type': 'loss', 'content': 0.07846604287624359, 'timestamp': '2025-09-30 22:44:39.525120', 'step': 20043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:39.583507', 'step': 20043, 'epoch': 3} {'type': 'loss', 'content': 0.07826227694749832, 'timestamp': '2025-09-30 22:44:39.590724', 'step': 20044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:39.656099', 'step': 20044, 'epoch': 3} {'type': 'loss', 'content': 0.05441842973232269, 'timestamp': '2025-09-30 22:44:39.660609', 'step': 20045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:39.719922', 'step': 20045, 'epoch': 3} {'type': 'loss', 'content': 0.08440715819597244, 'timestamp': '2025-09-30 22:44:39.722396', 'step': 20046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:39.780214', 'step': 20046, 'epoch': 3} {'type': 'loss', 'content': 0.0541238971054554, 'timestamp': '2025-09-30 22:44:39.783614', 'step': 20047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:39.841077', 'step': 20047, 'epoch': 3} {'type': 'loss', 'content': 0.12682762742042542, 'timestamp': '2025-09-30 22:44:39.851950', 'step': 20048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:39.912156', 'step': 20048, 'epoch': 3} {'type': 'loss', 'content': 0.04856892302632332, 'timestamp': '2025-09-30 22:44:39.916016', 'step': 20049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:39.972745', 'step': 20049, 'epoch': 3} {'type': 'loss', 'content': 0.10363850742578506, 'timestamp': '2025-09-30 22:44:39.975063', 'step': 20050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:40.032307', 'step': 20050, 'epoch': 3} {'type': 'loss', 'content': 0.04135195538401604, 'timestamp': '2025-09-30 22:44:40.036296', 'step': 20051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:40.105660', 'step': 20051, 'epoch': 3} {'type': 'loss', 'content': 0.03163996711373329, 'timestamp': '2025-09-30 22:44:40.111826', 'step': 20052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:40.175581', 'step': 20052, 'epoch': 3} {'type': 'loss', 'content': 0.0953095406293869, 'timestamp': '2025-09-30 22:44:40.180559', 'step': 20053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:40.240529', 'step': 20053, 'epoch': 3} {'type': 'loss', 'content': 0.07457326352596283, 'timestamp': '2025-09-30 22:44:40.242909', 'step': 20054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:40.301325', 'step': 20054, 'epoch': 3} {'type': 'loss', 'content': 0.06873851269483566, 'timestamp': '2025-09-30 22:44:40.308419', 'step': 20055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:40.368074', 'step': 20055, 'epoch': 3} {'type': 'loss', 'content': 0.13089774549007416, 'timestamp': '2025-09-30 22:44:40.374269', 'step': 20056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:40.430954', 'step': 20056, 'epoch': 3} {'type': 'loss', 'content': 0.06998729705810547, 'timestamp': '2025-09-30 22:44:40.433560', 'step': 20057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:40.491060', 'step': 20057, 'epoch': 3} {'type': 'loss', 'content': 0.12901179492473602, 'timestamp': '2025-09-30 22:44:40.493877', 'step': 20058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:40.552544', 'step': 20058, 'epoch': 3} {'type': 'loss', 'content': 0.215809628367424, 'timestamp': '2025-09-30 22:44:40.554754', 'step': 20059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:40.612078', 'step': 20059, 'epoch': 3} {'type': 'loss', 'content': 0.10702722519636154, 'timestamp': '2025-09-30 22:44:40.618365', 'step': 20060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:40.675544', 'step': 20060, 'epoch': 3} {'type': 'loss', 'content': 0.1255861073732376, 'timestamp': '2025-09-30 22:44:40.678309', 'step': 20061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:40.735302', 'step': 20061, 'epoch': 3} {'type': 'loss', 'content': 0.0788019597530365, 'timestamp': '2025-09-30 22:44:40.738344', 'step': 20062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:40.796298', 'step': 20062, 'epoch': 3} {'type': 'loss', 'content': 0.05924142152070999, 'timestamp': '2025-09-30 22:44:40.798872', 'step': 20063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:40.867550', 'step': 20063, 'epoch': 3} {'type': 'loss', 'content': 0.0739288479089737, 'timestamp': '2025-09-30 22:44:40.873536', 'step': 20064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:40.932352', 'step': 20064, 'epoch': 3} {'type': 'loss', 'content': 0.06116103008389473, 'timestamp': '2025-09-30 22:44:40.936037', 'step': 20065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:40.998144', 'step': 20065, 'epoch': 3} {'type': 'loss', 'content': 0.0880577564239502, 'timestamp': '2025-09-30 22:44:41.004046', 'step': 20066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:41.077175', 'step': 20066, 'epoch': 3} {'type': 'loss', 'content': 0.0731586292386055, 'timestamp': '2025-09-30 22:44:41.081975', 'step': 20067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:41.142079', 'step': 20067, 'epoch': 3} {'type': 'loss', 'content': 0.1313621699810028, 'timestamp': '2025-09-30 22:44:41.152420', 'step': 20068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:41.224782', 'step': 20068, 'epoch': 3} {'type': 'loss', 'content': 0.00913478434085846, 'timestamp': '2025-09-30 22:44:41.232952', 'step': 20069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:41.293900', 'step': 20069, 'epoch': 3} {'type': 'loss', 'content': 0.1237267553806305, 'timestamp': '2025-09-30 22:44:41.296818', 'step': 20070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:41.366798', 'step': 20070, 'epoch': 3} {'type': 'loss', 'content': 0.056215669959783554, 'timestamp': '2025-09-30 22:44:41.369337', 'step': 20071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:41.431124', 'step': 20071, 'epoch': 3} {'type': 'loss', 'content': 0.10452868789434433, 'timestamp': '2025-09-30 22:44:41.437364', 'step': 20072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:41.507169', 'step': 20072, 'epoch': 3} {'type': 'loss', 'content': 0.09934544563293457, 'timestamp': '2025-09-30 22:44:41.509901', 'step': 20073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:41.567558', 'step': 20073, 'epoch': 3} {'type': 'loss', 'content': 0.08387503772974014, 'timestamp': '2025-09-30 22:44:41.570229', 'step': 20074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:41.639526', 'step': 20074, 'epoch': 3} {'type': 'loss', 'content': 0.07881642132997513, 'timestamp': '2025-09-30 22:44:41.644771', 'step': 20075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:41.701662', 'step': 20075, 'epoch': 3} {'type': 'loss', 'content': 0.10016055405139923, 'timestamp': '2025-09-30 22:44:41.708600', 'step': 20076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:41.764840', 'step': 20076, 'epoch': 3} {'type': 'loss', 'content': 0.07680315524339676, 'timestamp': '2025-09-30 22:44:41.767592', 'step': 20077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:41.833825', 'step': 20077, 'epoch': 3} {'type': 'loss', 'content': 0.047959860414266586, 'timestamp': '2025-09-30 22:44:41.836547', 'step': 20078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:41.894360', 'step': 20078, 'epoch': 3} {'type': 'loss', 'content': 0.06427045911550522, 'timestamp': '2025-09-30 22:44:41.899652', 'step': 20079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:41.956304', 'step': 20079, 'epoch': 3} {'type': 'loss', 'content': 0.1172366663813591, 'timestamp': '2025-09-30 22:44:41.962678', 'step': 20080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:42.031924', 'step': 20080, 'epoch': 3} {'type': 'loss', 'content': 0.03473983705043793, 'timestamp': '2025-09-30 22:44:42.037383', 'step': 20081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:42.096770', 'step': 20081, 'epoch': 3} {'type': 'loss', 'content': 0.0863250270485878, 'timestamp': '2025-09-30 22:44:42.099471', 'step': 20082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:42.157979', 'step': 20082, 'epoch': 3} {'type': 'loss', 'content': 0.1136406734585762, 'timestamp': '2025-09-30 22:44:42.160965', 'step': 20083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:42.243224', 'step': 20083, 'epoch': 3} {'type': 'loss', 'content': 0.12120997160673141, 'timestamp': '2025-09-30 22:44:42.256718', 'step': 20084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:42.328325', 'step': 20084, 'epoch': 3} {'type': 'loss', 'content': 0.12969782948493958, 'timestamp': '2025-09-30 22:44:42.332638', 'step': 20085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:42.391300', 'step': 20085, 'epoch': 3} {'type': 'loss', 'content': 0.08497676998376846, 'timestamp': '2025-09-30 22:44:42.393939', 'step': 20086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:42.464881', 'step': 20086, 'epoch': 3} {'type': 'loss', 'content': 0.13642726838588715, 'timestamp': '2025-09-30 22:44:42.468737', 'step': 20087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:42.532822', 'step': 20087, 'epoch': 3} {'type': 'loss', 'content': 0.05111358314752579, 'timestamp': '2025-09-30 22:44:42.542123', 'step': 20088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:42.598817', 'step': 20088, 'epoch': 3} {'type': 'loss', 'content': 0.1588156670331955, 'timestamp': '2025-09-30 22:44:42.601292', 'step': 20089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:42.661582', 'step': 20089, 'epoch': 3} {'type': 'loss', 'content': 0.07067789882421494, 'timestamp': '2025-09-30 22:44:42.676397', 'step': 20090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:42.734463', 'step': 20090, 'epoch': 3} {'type': 'loss', 'content': 0.12573093175888062, 'timestamp': '2025-09-30 22:44:42.737687', 'step': 20091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:42.795538', 'step': 20091, 'epoch': 3} {'type': 'loss', 'content': 0.1386992484331131, 'timestamp': '2025-09-30 22:44:42.802097', 'step': 20092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:42.879461', 'step': 20092, 'epoch': 3} {'type': 'loss', 'content': 0.02678867056965828, 'timestamp': '2025-09-30 22:44:42.885277', 'step': 20093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:42.951143', 'step': 20093, 'epoch': 3} {'type': 'loss', 'content': 0.07985378056764603, 'timestamp': '2025-09-30 22:44:42.954680', 'step': 20094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:44:43.017818', 'step': 20094, 'epoch': 3} {'type': 'loss', 'content': 0.09295818954706192, 'timestamp': '2025-09-30 22:44:43.021889', 'step': 20095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:43.082486', 'step': 20095, 'epoch': 3} {'type': 'loss', 'content': 0.08511601388454437, 'timestamp': '2025-09-30 22:44:43.088649', 'step': 20096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:44:43.159060', 'step': 20096, 'epoch': 3} {'type': 'loss', 'content': 0.060254402458667755, 'timestamp': '2025-09-30 22:44:43.172380', 'step': 20097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:43.237978', 'step': 20097, 'epoch': 3} {'type': 'loss', 'content': 0.029477624222636223, 'timestamp': '2025-09-30 22:44:43.251323', 'step': 20098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:43.317701', 'step': 20098, 'epoch': 3} {'type': 'loss', 'content': 0.11015821993350983, 'timestamp': '2025-09-30 22:44:43.329824', 'step': 20099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:43.387978', 'step': 20099, 'epoch': 3} {'type': 'loss', 'content': 0.09479686617851257, 'timestamp': '2025-09-30 22:44:43.396048', 'step': 20100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:43.466177', 'step': 20100, 'epoch': 3} {'type': 'loss', 'content': 0.09033682942390442, 'timestamp': '2025-09-30 22:44:43.469996', 'step': 20101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:43.533665', 'step': 20101, 'epoch': 3} {'type': 'loss', 'content': 0.13598547875881195, 'timestamp': '2025-09-30 22:44:43.548418', 'step': 20102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:43.610444', 'step': 20102, 'epoch': 3} {'type': 'loss', 'content': 0.10937602818012238, 'timestamp': '2025-09-30 22:44:43.616093', 'step': 20103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:43.682975', 'step': 20103, 'epoch': 3} {'type': 'loss', 'content': 0.08702345937490463, 'timestamp': '2025-09-30 22:44:43.690402', 'step': 20104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:43.747623', 'step': 20104, 'epoch': 3} {'type': 'loss', 'content': 0.08868037909269333, 'timestamp': '2025-09-30 22:44:43.751270', 'step': 20105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:43.814146', 'step': 20105, 'epoch': 3} {'type': 'loss', 'content': 0.12295009195804596, 'timestamp': '2025-09-30 22:44:43.819877', 'step': 20106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:43.879371', 'step': 20106, 'epoch': 3} {'type': 'loss', 'content': 0.07209727168083191, 'timestamp': '2025-09-30 22:44:43.882908', 'step': 20107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:43.941936', 'step': 20107, 'epoch': 3} {'type': 'loss', 'content': 0.16093918681144714, 'timestamp': '2025-09-30 22:44:43.949188', 'step': 20108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:44.006830', 'step': 20108, 'epoch': 3} {'type': 'loss', 'content': 0.12255271524190903, 'timestamp': '2025-09-30 22:44:44.015941', 'step': 20109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:44.077620', 'step': 20109, 'epoch': 3} {'type': 'loss', 'content': 0.051364973187446594, 'timestamp': '2025-09-30 22:44:44.080501', 'step': 20110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:44.137922', 'step': 20110, 'epoch': 3} {'type': 'loss', 'content': 0.07938428968191147, 'timestamp': '2025-09-30 22:44:44.149115', 'step': 20111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:44.208175', 'step': 20111, 'epoch': 3} {'type': 'loss', 'content': 0.09796518832445145, 'timestamp': '2025-09-30 22:44:44.214516', 'step': 20112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:44.276055', 'step': 20112, 'epoch': 3} {'type': 'loss', 'content': 0.05982454493641853, 'timestamp': '2025-09-30 22:44:44.283176', 'step': 20113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:44.344235', 'step': 20113, 'epoch': 3} {'type': 'loss', 'content': 0.08762334287166595, 'timestamp': '2025-09-30 22:44:44.347365', 'step': 20114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:44.413117', 'step': 20114, 'epoch': 3} {'type': 'loss', 'content': 0.04412874951958656, 'timestamp': '2025-09-30 22:44:44.416403', 'step': 20115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:44.485701', 'step': 20115, 'epoch': 3} {'type': 'loss', 'content': 0.06486412137746811, 'timestamp': '2025-09-30 22:44:44.491828', 'step': 20116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:44.551288', 'step': 20116, 'epoch': 3} {'type': 'loss', 'content': 0.17364363372325897, 'timestamp': '2025-09-30 22:44:44.554898', 'step': 20117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:44.613423', 'step': 20117, 'epoch': 3} {'type': 'loss', 'content': 0.10506554692983627, 'timestamp': '2025-09-30 22:44:44.618782', 'step': 20118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:44.678382', 'step': 20118, 'epoch': 3} {'type': 'loss', 'content': 0.07410231232643127, 'timestamp': '2025-09-30 22:44:44.683287', 'step': 20119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:44.746405', 'step': 20119, 'epoch': 3} {'type': 'loss', 'content': 0.16045106947422028, 'timestamp': '2025-09-30 22:44:44.754815', 'step': 20120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:44.813531', 'step': 20120, 'epoch': 3} {'type': 'loss', 'content': 0.16135768592357635, 'timestamp': '2025-09-30 22:44:44.819390', 'step': 20121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:44.880433', 'step': 20121, 'epoch': 3} {'type': 'loss', 'content': 0.10196268558502197, 'timestamp': '2025-09-30 22:44:44.885903', 'step': 20122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:44:44.943443', 'step': 20122, 'epoch': 3} {'type': 'loss', 'content': 0.09105244278907776, 'timestamp': '2025-09-30 22:44:44.948196', 'step': 20123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:45.011221', 'step': 20123, 'epoch': 3} {'type': 'loss', 'content': 0.07537337392568588, 'timestamp': '2025-09-30 22:44:45.019382', 'step': 20124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:45.085191', 'step': 20124, 'epoch': 3} {'type': 'loss', 'content': 0.046776603907346725, 'timestamp': '2025-09-30 22:44:45.088188', 'step': 20125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:45.160426', 'step': 20125, 'epoch': 3} {'type': 'loss', 'content': 0.06548315286636353, 'timestamp': '2025-09-30 22:44:45.164899', 'step': 20126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:45.239299', 'step': 20126, 'epoch': 3} {'type': 'loss', 'content': 0.09569396078586578, 'timestamp': '2025-09-30 22:44:45.242030', 'step': 20127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:45.301275', 'step': 20127, 'epoch': 3} {'type': 'loss', 'content': 0.09477292001247406, 'timestamp': '2025-09-30 22:44:45.307887', 'step': 20128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:45.367548', 'step': 20128, 'epoch': 3} {'type': 'loss', 'content': 0.12331834435462952, 'timestamp': '2025-09-30 22:44:45.372528', 'step': 20129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:45.437538', 'step': 20129, 'epoch': 3} {'type': 'loss', 'content': 0.10710737109184265, 'timestamp': '2025-09-30 22:44:45.443056', 'step': 20130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:45.502799', 'step': 20130, 'epoch': 3} {'type': 'loss', 'content': 0.03903207927942276, 'timestamp': '2025-09-30 22:44:45.505863', 'step': 20131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:45.568595', 'step': 20131, 'epoch': 3} {'type': 'loss', 'content': 0.0906405970454216, 'timestamp': '2025-09-30 22:44:45.575886', 'step': 20132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:45.634127', 'step': 20132, 'epoch': 3} {'type': 'loss', 'content': 0.060759078711271286, 'timestamp': '2025-09-30 22:44:45.650159', 'step': 20133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:45.709975', 'step': 20133, 'epoch': 3} {'type': 'loss', 'content': 0.17081807553768158, 'timestamp': '2025-09-30 22:44:45.717091', 'step': 20134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:45.780419', 'step': 20134, 'epoch': 3} {'type': 'loss', 'content': 0.06938032060861588, 'timestamp': '2025-09-30 22:44:45.782996', 'step': 20135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:45.842009', 'step': 20135, 'epoch': 3} {'type': 'loss', 'content': 0.09243913739919662, 'timestamp': '2025-09-30 22:44:45.848125', 'step': 20136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:45.904817', 'step': 20136, 'epoch': 3} {'type': 'loss', 'content': 0.051079053431749344, 'timestamp': '2025-09-30 22:44:45.909193', 'step': 20137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:45.970231', 'step': 20137, 'epoch': 3} {'type': 'loss', 'content': 0.08669065684080124, 'timestamp': '2025-09-30 22:44:45.972811', 'step': 20138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:46.034914', 'step': 20138, 'epoch': 3} {'type': 'loss', 'content': 0.07444587349891663, 'timestamp': '2025-09-30 22:44:46.039907', 'step': 20139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:46.098165', 'step': 20139, 'epoch': 3} {'type': 'loss', 'content': 0.11128832399845123, 'timestamp': '2025-09-30 22:44:46.104039', 'step': 20140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:46.168721', 'step': 20140, 'epoch': 3} {'type': 'loss', 'content': 0.18216317892074585, 'timestamp': '2025-09-30 22:44:46.173251', 'step': 20141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:46.234095', 'step': 20141, 'epoch': 3} {'type': 'loss', 'content': 0.14409123361110687, 'timestamp': '2025-09-30 22:44:46.238668', 'step': 20142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:46.307099', 'step': 20142, 'epoch': 3} {'type': 'loss', 'content': 0.03589528799057007, 'timestamp': '2025-09-30 22:44:46.309673', 'step': 20143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:46.380075', 'step': 20143, 'epoch': 3} {'type': 'loss', 'content': 0.09082724899053574, 'timestamp': '2025-09-30 22:44:46.386403', 'step': 20144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:46.452711', 'step': 20144, 'epoch': 3} {'type': 'loss', 'content': 0.06441058963537216, 'timestamp': '2025-09-30 22:44:46.455247', 'step': 20145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:46.534847', 'step': 20145, 'epoch': 3} {'type': 'loss', 'content': 0.05521489307284355, 'timestamp': '2025-09-30 22:44:46.538844', 'step': 20146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:46.621307', 'step': 20146, 'epoch': 3} {'type': 'loss', 'content': 0.11860349774360657, 'timestamp': '2025-09-30 22:44:46.633092', 'step': 20147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:46.707740', 'step': 20147, 'epoch': 3} {'type': 'loss', 'content': 0.06748218089342117, 'timestamp': '2025-09-30 22:44:46.714401', 'step': 20148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:46.774222', 'step': 20148, 'epoch': 3} {'type': 'loss', 'content': 0.11515721678733826, 'timestamp': '2025-09-30 22:44:46.776806', 'step': 20149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:46.839952', 'step': 20149, 'epoch': 3} {'type': 'loss', 'content': 0.12486100941896439, 'timestamp': '2025-09-30 22:44:46.850094', 'step': 20150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:46.914693', 'step': 20150, 'epoch': 3} {'type': 'loss', 'content': 0.031148405745625496, 'timestamp': '2025-09-30 22:44:46.919295', 'step': 20151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:46.982232', 'step': 20151, 'epoch': 3} {'type': 'loss', 'content': 0.04378809034824371, 'timestamp': '2025-09-30 22:44:46.992258', 'step': 20152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:47.070856', 'step': 20152, 'epoch': 3} {'type': 'loss', 'content': 0.02720915898680687, 'timestamp': '2025-09-30 22:44:47.076860', 'step': 20153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:47.143032', 'step': 20153, 'epoch': 3} {'type': 'loss', 'content': 0.13857877254486084, 'timestamp': '2025-09-30 22:44:47.148124', 'step': 20154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:47.220725', 'step': 20154, 'epoch': 3} {'type': 'loss', 'content': 0.03325508534908295, 'timestamp': '2025-09-30 22:44:47.238769', 'step': 20155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:47.311727', 'step': 20155, 'epoch': 3} {'type': 'loss', 'content': 0.0685223639011383, 'timestamp': '2025-09-30 22:44:47.318283', 'step': 20156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:47.376734', 'step': 20156, 'epoch': 3} {'type': 'loss', 'content': 0.07162893563508987, 'timestamp': '2025-09-30 22:44:47.379752', 'step': 20157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:47.444186', 'step': 20157, 'epoch': 3} {'type': 'loss', 'content': 0.08326848596334457, 'timestamp': '2025-09-30 22:44:47.450748', 'step': 20158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:47.517658', 'step': 20158, 'epoch': 3} {'type': 'loss', 'content': 0.1114625409245491, 'timestamp': '2025-09-30 22:44:47.521904', 'step': 20159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:47.580649', 'step': 20159, 'epoch': 3} {'type': 'loss', 'content': 0.11320634931325912, 'timestamp': '2025-09-30 22:44:47.588549', 'step': 20160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:47.658973', 'step': 20160, 'epoch': 3} {'type': 'loss', 'content': 0.08098820596933365, 'timestamp': '2025-09-30 22:44:47.670535', 'step': 20161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:47.729841', 'step': 20161, 'epoch': 3} {'type': 'loss', 'content': 0.07677698135375977, 'timestamp': '2025-09-30 22:44:47.733333', 'step': 20162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:47.791632', 'step': 20162, 'epoch': 3} {'type': 'loss', 'content': 0.04693975672125816, 'timestamp': '2025-09-30 22:44:47.794533', 'step': 20163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:47.866903', 'step': 20163, 'epoch': 3} {'type': 'loss', 'content': 0.09894318878650665, 'timestamp': '2025-09-30 22:44:47.875311', 'step': 20164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:47.940059', 'step': 20164, 'epoch': 3} {'type': 'loss', 'content': 0.03350703418254852, 'timestamp': '2025-09-30 22:44:47.944448', 'step': 20165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:48.015348', 'step': 20165, 'epoch': 3} {'type': 'loss', 'content': 0.0624769926071167, 'timestamp': '2025-09-30 22:44:48.018009', 'step': 20166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:48.080376', 'step': 20166, 'epoch': 3} {'type': 'loss', 'content': 0.07737515866756439, 'timestamp': '2025-09-30 22:44:48.082988', 'step': 20167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:48.141799', 'step': 20167, 'epoch': 3} {'type': 'loss', 'content': 0.0805918499827385, 'timestamp': '2025-09-30 22:44:48.148595', 'step': 20168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:48.209566', 'step': 20168, 'epoch': 3} {'type': 'loss', 'content': 0.03808218985795975, 'timestamp': '2025-09-30 22:44:48.212784', 'step': 20169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:48.275685', 'step': 20169, 'epoch': 3} {'type': 'loss', 'content': 0.13381020724773407, 'timestamp': '2025-09-30 22:44:48.291129', 'step': 20170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:48.363809', 'step': 20170, 'epoch': 3} {'type': 'loss', 'content': 0.0614263154566288, 'timestamp': '2025-09-30 22:44:48.367783', 'step': 20171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:48.437026', 'step': 20171, 'epoch': 3} {'type': 'loss', 'content': 0.08820292353630066, 'timestamp': '2025-09-30 22:44:48.448923', 'step': 20172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:48.520277', 'step': 20172, 'epoch': 3} {'type': 'loss', 'content': 0.10922212153673172, 'timestamp': '2025-09-30 22:44:48.529391', 'step': 20173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:48.599901', 'step': 20173, 'epoch': 3} {'type': 'loss', 'content': 0.11137678474187851, 'timestamp': '2025-09-30 22:44:48.609234', 'step': 20174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:48.675357', 'step': 20174, 'epoch': 3} {'type': 'loss', 'content': 0.12234953790903091, 'timestamp': '2025-09-30 22:44:48.684270', 'step': 20175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:48.745903', 'step': 20175, 'epoch': 3} {'type': 'loss', 'content': 0.10439779609441757, 'timestamp': '2025-09-30 22:44:48.753674', 'step': 20176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:48.820930', 'step': 20176, 'epoch': 3} {'type': 'loss', 'content': 0.08905572444200516, 'timestamp': '2025-09-30 22:44:48.824275', 'step': 20177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:48.884213', 'step': 20177, 'epoch': 3} {'type': 'loss', 'content': 0.06081728637218475, 'timestamp': '2025-09-30 22:44:48.892568', 'step': 20178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:48.952382', 'step': 20178, 'epoch': 3} {'type': 'loss', 'content': 0.042119793593883514, 'timestamp': '2025-09-30 22:44:48.955274', 'step': 20179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:49.027450', 'step': 20179, 'epoch': 3} {'type': 'loss', 'content': 0.06202346831560135, 'timestamp': '2025-09-30 22:44:49.039092', 'step': 20180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:49.103911', 'step': 20180, 'epoch': 3} {'type': 'loss', 'content': 0.035505764186382294, 'timestamp': '2025-09-30 22:44:49.108012', 'step': 20181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:49.165483', 'step': 20181, 'epoch': 3} {'type': 'loss', 'content': 0.04877260699868202, 'timestamp': '2025-09-30 22:44:49.174046', 'step': 20182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:49.251092', 'step': 20182, 'epoch': 3} {'type': 'loss', 'content': 0.041641175746917725, 'timestamp': '2025-09-30 22:44:49.253825', 'step': 20183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:49.313730', 'step': 20183, 'epoch': 3} {'type': 'loss', 'content': 0.053279709070920944, 'timestamp': '2025-09-30 22:44:49.320005', 'step': 20184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:49.377075', 'step': 20184, 'epoch': 3} {'type': 'loss', 'content': 0.13564155995845795, 'timestamp': '2025-09-30 22:44:49.385996', 'step': 20185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:49.451566', 'step': 20185, 'epoch': 3} {'type': 'loss', 'content': 0.1018015593290329, 'timestamp': '2025-09-30 22:44:49.454296', 'step': 20186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:49.512607', 'step': 20186, 'epoch': 3} {'type': 'loss', 'content': 0.10501354932785034, 'timestamp': '2025-09-30 22:44:49.515336', 'step': 20187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:49.587423', 'step': 20187, 'epoch': 3} {'type': 'loss', 'content': 0.032468296587467194, 'timestamp': '2025-09-30 22:44:49.594395', 'step': 20188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:49.655828', 'step': 20188, 'epoch': 3} {'type': 'loss', 'content': 0.09478811919689178, 'timestamp': '2025-09-30 22:44:49.664665', 'step': 20189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:49.724828', 'step': 20189, 'epoch': 3} {'type': 'loss', 'content': 0.09537559747695923, 'timestamp': '2025-09-30 22:44:49.730348', 'step': 20190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:49.788344', 'step': 20190, 'epoch': 3} {'type': 'loss', 'content': 0.10853103548288345, 'timestamp': '2025-09-30 22:44:49.792885', 'step': 20191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:49.865802', 'step': 20191, 'epoch': 3} {'type': 'loss', 'content': 0.026980256661772728, 'timestamp': '2025-09-30 22:44:49.873424', 'step': 20192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:49.932115', 'step': 20192, 'epoch': 3} {'type': 'loss', 'content': 0.14804023504257202, 'timestamp': '2025-09-30 22:44:49.933943', 'step': 20193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:49.998787', 'step': 20193, 'epoch': 3} {'type': 'loss', 'content': 0.149910107254982, 'timestamp': '2025-09-30 22:44:50.004040', 'step': 20194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:50.095860', 'step': 20194, 'epoch': 3} {'type': 'loss', 'content': 0.15248286724090576, 'timestamp': '2025-09-30 22:44:50.099782', 'step': 20195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:50.170118', 'step': 20195, 'epoch': 3} {'type': 'loss', 'content': 0.08650916069746017, 'timestamp': '2025-09-30 22:44:50.183583', 'step': 20196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:50.259428', 'step': 20196, 'epoch': 3} {'type': 'loss', 'content': 0.04331028088927269, 'timestamp': '2025-09-30 22:44:50.263755', 'step': 20197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:50.328841', 'step': 20197, 'epoch': 3} {'type': 'loss', 'content': 0.07006163895130157, 'timestamp': '2025-09-30 22:44:50.332567', 'step': 20198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:50.411021', 'step': 20198, 'epoch': 3} {'type': 'loss', 'content': 0.23441648483276367, 'timestamp': '2025-09-30 22:44:50.425076', 'step': 20199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:50.506688', 'step': 20199, 'epoch': 3} {'type': 'loss', 'content': 0.0706806629896164, 'timestamp': '2025-09-30 22:44:50.521753', 'step': 20200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:50.601664', 'step': 20200, 'epoch': 3} {'type': 'loss', 'content': 0.1498478502035141, 'timestamp': '2025-09-30 22:44:50.608683', 'step': 20201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:50.667970', 'step': 20201, 'epoch': 3} {'type': 'loss', 'content': 0.1705629676580429, 'timestamp': '2025-09-30 22:44:50.672062', 'step': 20202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:50.728577', 'step': 20202, 'epoch': 3} {'type': 'loss', 'content': 0.07761158794164658, 'timestamp': '2025-09-30 22:44:50.733087', 'step': 20203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:50.822855', 'step': 20203, 'epoch': 3} {'type': 'loss', 'content': 0.0583445243537426, 'timestamp': '2025-09-30 22:44:50.829919', 'step': 20204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:50.904301', 'step': 20204, 'epoch': 3} {'type': 'loss', 'content': 0.12416160106658936, 'timestamp': '2025-09-30 22:44:50.909189', 'step': 20205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:51.008125', 'step': 20205, 'epoch': 3} {'type': 'loss', 'content': 0.21668031811714172, 'timestamp': '2025-09-30 22:44:51.017478', 'step': 20206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:51.111110', 'step': 20206, 'epoch': 3} {'type': 'loss', 'content': 0.0413353256881237, 'timestamp': '2025-09-30 22:44:51.115441', 'step': 20207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:51.205073', 'step': 20207, 'epoch': 3} {'type': 'loss', 'content': 0.05928569659590721, 'timestamp': '2025-09-30 22:44:51.212273', 'step': 20208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:51.303314', 'step': 20208, 'epoch': 3} {'type': 'loss', 'content': 0.11216799169778824, 'timestamp': '2025-09-30 22:44:51.308398', 'step': 20209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:51.394909', 'step': 20209, 'epoch': 3} {'type': 'loss', 'content': 0.08566485345363617, 'timestamp': '2025-09-30 22:44:51.408533', 'step': 20210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:51.480929', 'step': 20210, 'epoch': 3} {'type': 'loss', 'content': 0.11117680370807648, 'timestamp': '2025-09-30 22:44:51.483457', 'step': 20211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:51.564119', 'step': 20211, 'epoch': 3} {'type': 'loss', 'content': 0.16694337129592896, 'timestamp': '2025-09-30 22:44:51.571220', 'step': 20212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:51.658945', 'step': 20212, 'epoch': 3} {'type': 'loss', 'content': 0.16331464052200317, 'timestamp': '2025-09-30 22:44:51.662432', 'step': 20213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:51.721537', 'step': 20213, 'epoch': 3} {'type': 'loss', 'content': 0.05311097577214241, 'timestamp': '2025-09-30 22:44:51.724926', 'step': 20214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:51.800145', 'step': 20214, 'epoch': 3} {'type': 'loss', 'content': 0.04071401059627533, 'timestamp': '2025-09-30 22:44:51.809873', 'step': 20215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:51.872202', 'step': 20215, 'epoch': 3} {'type': 'loss', 'content': 0.09347672760486603, 'timestamp': '2025-09-30 22:44:51.878506', 'step': 20216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:51.951296', 'step': 20216, 'epoch': 3} {'type': 'loss', 'content': 0.08740349113941193, 'timestamp': '2025-09-30 22:44:51.959580', 'step': 20217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.024770', 'step': 20217, 'epoch': 3} {'type': 'loss', 'content': 0.12955322861671448, 'timestamp': '2025-09-30 22:44:52.027932', 'step': 20218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:52.090339', 'step': 20218, 'epoch': 3} {'type': 'loss', 'content': 0.1121426522731781, 'timestamp': '2025-09-30 22:44:52.094871', 'step': 20219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:52.153747', 'step': 20219, 'epoch': 3} {'type': 'loss', 'content': 0.0989324077963829, 'timestamp': '2025-09-30 22:44:52.161276', 'step': 20220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.231601', 'step': 20220, 'epoch': 3} {'type': 'loss', 'content': 0.0783199593424797, 'timestamp': '2025-09-30 22:44:52.247460', 'step': 20221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:52.308417', 'step': 20221, 'epoch': 3} {'type': 'loss', 'content': 0.09975484013557434, 'timestamp': '2025-09-30 22:44:52.311766', 'step': 20222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.370587', 'step': 20222, 'epoch': 3} {'type': 'loss', 'content': 0.13566173613071442, 'timestamp': '2025-09-30 22:44:52.373383', 'step': 20223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.437990', 'step': 20223, 'epoch': 3} {'type': 'loss', 'content': 0.032073725014925, 'timestamp': '2025-09-30 22:44:52.444681', 'step': 20224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.505564', 'step': 20224, 'epoch': 3} {'type': 'loss', 'content': 0.04555955529212952, 'timestamp': '2025-09-30 22:44:52.518356', 'step': 20225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:52.579310', 'step': 20225, 'epoch': 3} {'type': 'loss', 'content': 0.08410656452178955, 'timestamp': '2025-09-30 22:44:52.583098', 'step': 20226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.651529', 'step': 20226, 'epoch': 3} {'type': 'loss', 'content': 0.06635071337223053, 'timestamp': '2025-09-30 22:44:52.655513', 'step': 20227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:52.713972', 'step': 20227, 'epoch': 3} {'type': 'loss', 'content': 0.10625948756933212, 'timestamp': '2025-09-30 22:44:52.721888', 'step': 20228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.782153', 'step': 20228, 'epoch': 3} {'type': 'loss', 'content': 0.0968458503484726, 'timestamp': '2025-09-30 22:44:52.786088', 'step': 20229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:52.847185', 'step': 20229, 'epoch': 3} {'type': 'loss', 'content': 0.07360103726387024, 'timestamp': '2025-09-30 22:44:52.851187', 'step': 20230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:52.911363', 'step': 20230, 'epoch': 3} {'type': 'loss', 'content': 0.04387315735220909, 'timestamp': '2025-09-30 22:44:52.916230', 'step': 20231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:52.976194', 'step': 20231, 'epoch': 3} {'type': 'loss', 'content': 0.09469803422689438, 'timestamp': '2025-09-30 22:44:52.990355', 'step': 20232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:53.048619', 'step': 20232, 'epoch': 3} {'type': 'loss', 'content': 0.13643677532672882, 'timestamp': '2025-09-30 22:44:53.053641', 'step': 20233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:53.113431', 'step': 20233, 'epoch': 3} {'type': 'loss', 'content': 0.1878739893436432, 'timestamp': '2025-09-30 22:44:53.117527', 'step': 20234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:53.182152', 'step': 20234, 'epoch': 3} {'type': 'loss', 'content': 0.08630923926830292, 'timestamp': '2025-09-30 22:44:53.185911', 'step': 20235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:53.246210', 'step': 20235, 'epoch': 3} {'type': 'loss', 'content': 0.1716683804988861, 'timestamp': '2025-09-30 22:44:53.253173', 'step': 20236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:53.318328', 'step': 20236, 'epoch': 3} {'type': 'loss', 'content': 0.05728089064359665, 'timestamp': '2025-09-30 22:44:53.320572', 'step': 20237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:53.379945', 'step': 20237, 'epoch': 3} {'type': 'loss', 'content': 0.046064119786024094, 'timestamp': '2025-09-30 22:44:53.386228', 'step': 20238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:53.452438', 'step': 20238, 'epoch': 3} {'type': 'loss', 'content': 0.07693465799093246, 'timestamp': '2025-09-30 22:44:53.456164', 'step': 20239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:53.537521', 'step': 20239, 'epoch': 3} {'type': 'loss', 'content': 0.10932106524705887, 'timestamp': '2025-09-30 22:44:53.552186', 'step': 20240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:53.619430', 'step': 20240, 'epoch': 3} {'type': 'loss', 'content': 0.05874301865696907, 'timestamp': '2025-09-30 22:44:53.622979', 'step': 20241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:53.713926', 'step': 20241, 'epoch': 3} {'type': 'loss', 'content': 0.0661529004573822, 'timestamp': '2025-09-30 22:44:53.729392', 'step': 20242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:53.802824', 'step': 20242, 'epoch': 3} {'type': 'loss', 'content': 0.06530693918466568, 'timestamp': '2025-09-30 22:44:53.808544', 'step': 20243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:53.874189', 'step': 20243, 'epoch': 3} {'type': 'loss', 'content': 0.17273834347724915, 'timestamp': '2025-09-30 22:44:53.898852', 'step': 20244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:53.977729', 'step': 20244, 'epoch': 3} {'type': 'loss', 'content': 0.07879283279180527, 'timestamp': '2025-09-30 22:44:53.996027', 'step': 20245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.074281', 'step': 20245, 'epoch': 3} {'type': 'loss', 'content': 0.04939916357398033, 'timestamp': '2025-09-30 22:44:54.091578', 'step': 20246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.165709', 'step': 20246, 'epoch': 3} {'type': 'loss', 'content': 0.09957008063793182, 'timestamp': '2025-09-30 22:44:54.171206', 'step': 20247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.235014', 'step': 20247, 'epoch': 3} {'type': 'loss', 'content': 0.06342846900224686, 'timestamp': '2025-09-30 22:44:54.243695', 'step': 20248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:54.305002', 'step': 20248, 'epoch': 3} {'type': 'loss', 'content': 0.07470466196537018, 'timestamp': '2025-09-30 22:44:54.309250', 'step': 20249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:44:54.382260', 'step': 20249, 'epoch': 3} {'type': 'loss', 'content': 0.07414410263299942, 'timestamp': '2025-09-30 22:44:54.388271', 'step': 20250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.453944', 'step': 20250, 'epoch': 3} {'type': 'loss', 'content': 0.10316642373800278, 'timestamp': '2025-09-30 22:44:54.456289', 'step': 20251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:54.517460', 'step': 20251, 'epoch': 3} {'type': 'loss', 'content': 0.11190719157457352, 'timestamp': '2025-09-30 22:44:54.526834', 'step': 20252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.605993', 'step': 20252, 'epoch': 3} {'type': 'loss', 'content': 0.09636586904525757, 'timestamp': '2025-09-30 22:44:54.611781', 'step': 20253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:54.681230', 'step': 20253, 'epoch': 3} {'type': 'loss', 'content': 0.01598946936428547, 'timestamp': '2025-09-30 22:44:54.684448', 'step': 20254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.751264', 'step': 20254, 'epoch': 3} {'type': 'loss', 'content': 0.18422168493270874, 'timestamp': '2025-09-30 22:44:54.755938', 'step': 20255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:54.832665', 'step': 20255, 'epoch': 3} {'type': 'loss', 'content': 0.11317536979913712, 'timestamp': '2025-09-30 22:44:54.841100', 'step': 20256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.899925', 'step': 20256, 'epoch': 3} {'type': 'loss', 'content': 0.052991341799497604, 'timestamp': '2025-09-30 22:44:54.902251', 'step': 20257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:54.989560', 'step': 20257, 'epoch': 3} {'type': 'loss', 'content': 0.08804389834403992, 'timestamp': '2025-09-30 22:44:54.993831', 'step': 20258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:55.060393', 'step': 20258, 'epoch': 3} {'type': 'loss', 'content': 0.11659014225006104, 'timestamp': '2025-09-30 22:44:55.063549', 'step': 20259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:55.123446', 'step': 20259, 'epoch': 3} {'type': 'loss', 'content': 0.05868830531835556, 'timestamp': '2025-09-30 22:44:55.131181', 'step': 20260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:55.190131', 'step': 20260, 'epoch': 3} {'type': 'loss', 'content': 0.06477202475070953, 'timestamp': '2025-09-30 22:44:55.193644', 'step': 20261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:55.266572', 'step': 20261, 'epoch': 3} {'type': 'loss', 'content': 0.015386002138257027, 'timestamp': '2025-09-30 22:44:55.269788', 'step': 20262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:55.329767', 'step': 20262, 'epoch': 3} {'type': 'loss', 'content': 0.08764561265707016, 'timestamp': '2025-09-30 22:44:55.340870', 'step': 20263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:55.401974', 'step': 20263, 'epoch': 3} {'type': 'loss', 'content': 0.12415862083435059, 'timestamp': '2025-09-30 22:44:55.416041', 'step': 20264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:55.475802', 'step': 20264, 'epoch': 3} {'type': 'loss', 'content': 0.07970864325761795, 'timestamp': '2025-09-30 22:44:55.479125', 'step': 20265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:55.539724', 'step': 20265, 'epoch': 3} {'type': 'loss', 'content': 0.10343990474939346, 'timestamp': '2025-09-30 22:44:55.552057', 'step': 20266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:55.638963', 'step': 20266, 'epoch': 3} {'type': 'loss', 'content': 0.13546833395957947, 'timestamp': '2025-09-30 22:44:55.641646', 'step': 20267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:55.704245', 'step': 20267, 'epoch': 3} {'type': 'loss', 'content': 0.13132773339748383, 'timestamp': '2025-09-30 22:44:55.712561', 'step': 20268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:55.773903', 'step': 20268, 'epoch': 3} {'type': 'loss', 'content': 0.10121119767427444, 'timestamp': '2025-09-30 22:44:55.777510', 'step': 20269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:55.842836', 'step': 20269, 'epoch': 3} {'type': 'loss', 'content': 0.14155252277851105, 'timestamp': '2025-09-30 22:44:55.852713', 'step': 20270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:55.918655', 'step': 20270, 'epoch': 3} {'type': 'loss', 'content': 0.1100236177444458, 'timestamp': '2025-09-30 22:44:55.922890', 'step': 20271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:55.986977', 'step': 20271, 'epoch': 3} {'type': 'loss', 'content': 0.13774573802947998, 'timestamp': '2025-09-30 22:44:55.994230', 'step': 20272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:56.051773', 'step': 20272, 'epoch': 3} {'type': 'loss', 'content': 0.03889841213822365, 'timestamp': '2025-09-30 22:44:56.062204', 'step': 20273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:56.127140', 'step': 20273, 'epoch': 3} {'type': 'loss', 'content': 0.0791991576552391, 'timestamp': '2025-09-30 22:44:56.138323', 'step': 20274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:56.223911', 'step': 20274, 'epoch': 3} {'type': 'loss', 'content': 0.10838738083839417, 'timestamp': '2025-09-30 22:44:56.227791', 'step': 20275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:56.288202', 'step': 20275, 'epoch': 3} {'type': 'loss', 'content': 0.15853406488895416, 'timestamp': '2025-09-30 22:44:56.296075', 'step': 20276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:56.354820', 'step': 20276, 'epoch': 3} {'type': 'loss', 'content': 0.09458862245082855, 'timestamp': '2025-09-30 22:44:56.358289', 'step': 20277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:56.416589', 'step': 20277, 'epoch': 3} {'type': 'loss', 'content': 0.1015702337026596, 'timestamp': '2025-09-30 22:44:56.419271', 'step': 20278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:56.477795', 'step': 20278, 'epoch': 3} {'type': 'loss', 'content': 0.04161498695611954, 'timestamp': '2025-09-30 22:44:56.480251', 'step': 20279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:56.539079', 'step': 20279, 'epoch': 3} {'type': 'loss', 'content': 0.10052410513162613, 'timestamp': '2025-09-30 22:44:56.549003', 'step': 20280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:56.607482', 'step': 20280, 'epoch': 3} {'type': 'loss', 'content': 0.07112045586109161, 'timestamp': '2025-09-30 22:44:56.610757', 'step': 20281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:56.669602', 'step': 20281, 'epoch': 3} {'type': 'loss', 'content': 0.09549757838249207, 'timestamp': '2025-09-30 22:44:56.672123', 'step': 20282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:56.730933', 'step': 20282, 'epoch': 3} {'type': 'loss', 'content': 0.11366795003414154, 'timestamp': '2025-09-30 22:44:56.746593', 'step': 20283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:56.808115', 'step': 20283, 'epoch': 3} {'type': 'loss', 'content': 0.09869897365570068, 'timestamp': '2025-09-30 22:44:56.821827', 'step': 20284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:56.884834', 'step': 20284, 'epoch': 3} {'type': 'loss', 'content': 0.06186051666736603, 'timestamp': '2025-09-30 22:44:56.892347', 'step': 20285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:56.962054', 'step': 20285, 'epoch': 3} {'type': 'loss', 'content': 0.13560254871845245, 'timestamp': '2025-09-30 22:44:56.979803', 'step': 20286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:57.042813', 'step': 20286, 'epoch': 3} {'type': 'loss', 'content': 0.04907461628317833, 'timestamp': '2025-09-30 22:44:57.053006', 'step': 20287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:57.113444', 'step': 20287, 'epoch': 3} {'type': 'loss', 'content': 0.04933584854006767, 'timestamp': '2025-09-30 22:44:57.127183', 'step': 20288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:57.188773', 'step': 20288, 'epoch': 3} {'type': 'loss', 'content': 0.07802039384841919, 'timestamp': '2025-09-30 22:44:57.192120', 'step': 20289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:57.257154', 'step': 20289, 'epoch': 3} {'type': 'loss', 'content': 0.05312555655837059, 'timestamp': '2025-09-30 22:44:57.260979', 'step': 20290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:44:57.319531', 'step': 20290, 'epoch': 3} {'type': 'loss', 'content': 0.05951350927352905, 'timestamp': '2025-09-30 22:44:57.328659', 'step': 20291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:57.393410', 'step': 20291, 'epoch': 3} {'type': 'loss', 'content': 0.1455385386943817, 'timestamp': '2025-09-30 22:44:57.407157', 'step': 20292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:57.480788', 'step': 20292, 'epoch': 3} {'type': 'loss', 'content': 0.07684396207332611, 'timestamp': '2025-09-30 22:44:57.484089', 'step': 20293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:57.552130', 'step': 20293, 'epoch': 3} {'type': 'loss', 'content': 0.07670570909976959, 'timestamp': '2025-09-30 22:44:57.561858', 'step': 20294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:57.620966', 'step': 20294, 'epoch': 3} {'type': 'loss', 'content': 0.036630116403102875, 'timestamp': '2025-09-30 22:44:57.623833', 'step': 20295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:57.685881', 'step': 20295, 'epoch': 3} {'type': 'loss', 'content': 0.03810770437121391, 'timestamp': '2025-09-30 22:44:57.693660', 'step': 20296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:57.757537', 'step': 20296, 'epoch': 3} {'type': 'loss', 'content': 0.06460981070995331, 'timestamp': '2025-09-30 22:44:57.760635', 'step': 20297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:57.829959', 'step': 20297, 'epoch': 3} {'type': 'loss', 'content': 0.05049276351928711, 'timestamp': '2025-09-30 22:44:57.837913', 'step': 20298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:57.917780', 'step': 20298, 'epoch': 3} {'type': 'loss', 'content': 0.09316477924585342, 'timestamp': '2025-09-30 22:44:57.920959', 'step': 20299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:57.983414', 'step': 20299, 'epoch': 3} {'type': 'loss', 'content': 0.058270134031772614, 'timestamp': '2025-09-30 22:44:57.990761', 'step': 20300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:58.050254', 'step': 20300, 'epoch': 3} {'type': 'loss', 'content': 0.08288801461458206, 'timestamp': '2025-09-30 22:44:58.056938', 'step': 20301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:58.116509', 'step': 20301, 'epoch': 3} {'type': 'loss', 'content': 0.0753215029835701, 'timestamp': '2025-09-30 22:44:58.123905', 'step': 20302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:58.189021', 'step': 20302, 'epoch': 3} {'type': 'loss', 'content': 0.03958384692668915, 'timestamp': '2025-09-30 22:44:58.191712', 'step': 20303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:58.275470', 'step': 20303, 'epoch': 3} {'type': 'loss', 'content': 0.09685254096984863, 'timestamp': '2025-09-30 22:44:58.282749', 'step': 20304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:58.343335', 'step': 20304, 'epoch': 3} {'type': 'loss', 'content': 0.05212876945734024, 'timestamp': '2025-09-30 22:44:58.346690', 'step': 20305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:58.408793', 'step': 20305, 'epoch': 3} {'type': 'loss', 'content': 0.050799738615751266, 'timestamp': '2025-09-30 22:44:58.412082', 'step': 20306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:58.472071', 'step': 20306, 'epoch': 3} {'type': 'loss', 'content': 0.06647688150405884, 'timestamp': '2025-09-30 22:44:58.478570', 'step': 20307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:58.540015', 'step': 20307, 'epoch': 3} {'type': 'loss', 'content': 0.10135269910097122, 'timestamp': '2025-09-30 22:44:58.552415', 'step': 20308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:58.609599', 'step': 20308, 'epoch': 3} {'type': 'loss', 'content': 0.11689440160989761, 'timestamp': '2025-09-30 22:44:58.621510', 'step': 20309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:58.681970', 'step': 20309, 'epoch': 3} {'type': 'loss', 'content': 0.15431921184062958, 'timestamp': '2025-09-30 22:44:58.686090', 'step': 20310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:58.747283', 'step': 20310, 'epoch': 3} {'type': 'loss', 'content': 0.06526980549097061, 'timestamp': '2025-09-30 22:44:58.753221', 'step': 20311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:58.814378', 'step': 20311, 'epoch': 3} {'type': 'loss', 'content': 0.11824209988117218, 'timestamp': '2025-09-30 22:44:58.822983', 'step': 20312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:58.906540', 'step': 20312, 'epoch': 3} {'type': 'loss', 'content': 0.04421605169773102, 'timestamp': '2025-09-30 22:44:58.912780', 'step': 20313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:59.003356', 'step': 20313, 'epoch': 3} {'type': 'loss', 'content': 0.04696546867489815, 'timestamp': '2025-09-30 22:44:59.006986', 'step': 20314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:59.065976', 'step': 20314, 'epoch': 3} {'type': 'loss', 'content': 0.13501399755477905, 'timestamp': '2025-09-30 22:44:59.070295', 'step': 20315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:59.128222', 'step': 20315, 'epoch': 3} {'type': 'loss', 'content': 0.10257839411497116, 'timestamp': '2025-09-30 22:44:59.139012', 'step': 20316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:59.200047', 'step': 20316, 'epoch': 3} {'type': 'loss', 'content': 0.07204829156398773, 'timestamp': '2025-09-30 22:44:59.203140', 'step': 20317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:59.260798', 'step': 20317, 'epoch': 3} {'type': 'loss', 'content': 0.14819523692131042, 'timestamp': '2025-09-30 22:44:59.267389', 'step': 20318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:59.329399', 'step': 20318, 'epoch': 3} {'type': 'loss', 'content': 0.10695651918649673, 'timestamp': '2025-09-30 22:44:59.332533', 'step': 20319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:59.391770', 'step': 20319, 'epoch': 3} {'type': 'loss', 'content': 0.06970785558223724, 'timestamp': '2025-09-30 22:44:59.398553', 'step': 20320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:59.466897', 'step': 20320, 'epoch': 3} {'type': 'loss', 'content': 0.052073776721954346, 'timestamp': '2025-09-30 22:44:59.469867', 'step': 20321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:59.526762', 'step': 20321, 'epoch': 3} {'type': 'loss', 'content': 0.05744193121790886, 'timestamp': '2025-09-30 22:44:59.529685', 'step': 20322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:44:59.586172', 'step': 20322, 'epoch': 3} {'type': 'loss', 'content': 0.080722875893116, 'timestamp': '2025-09-30 22:44:59.589082', 'step': 20323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:59.669622', 'step': 20323, 'epoch': 3} {'type': 'loss', 'content': 0.03828103095293045, 'timestamp': '2025-09-30 22:44:59.675754', 'step': 20324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:59.743067', 'step': 20324, 'epoch': 3} {'type': 'loss', 'content': 0.044024717062711716, 'timestamp': '2025-09-30 22:44:59.745331', 'step': 20325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:59.803562', 'step': 20325, 'epoch': 3} {'type': 'loss', 'content': 0.05761146545410156, 'timestamp': '2025-09-30 22:44:59.805977', 'step': 20326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:44:59.862202', 'step': 20326, 'epoch': 3} {'type': 'loss', 'content': 0.07307536154985428, 'timestamp': '2025-09-30 22:44:59.864638', 'step': 20327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:44:59.921330', 'step': 20327, 'epoch': 3} {'type': 'loss', 'content': 0.06914890557527542, 'timestamp': '2025-09-30 22:44:59.928699', 'step': 20328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:44:59.985724', 'step': 20328, 'epoch': 3} {'type': 'loss', 'content': 0.048754915595054626, 'timestamp': '2025-09-30 22:44:59.987766', 'step': 20329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:00.056760', 'step': 20329, 'epoch': 3} {'type': 'loss', 'content': 0.09360290318727493, 'timestamp': '2025-09-30 22:45:00.059200', 'step': 20330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.117198', 'step': 20330, 'epoch': 3} {'type': 'loss', 'content': 0.10030350089073181, 'timestamp': '2025-09-30 22:45:00.119849', 'step': 20331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:00.177457', 'step': 20331, 'epoch': 3} {'type': 'loss', 'content': 0.1398153156042099, 'timestamp': '2025-09-30 22:45:00.186635', 'step': 20332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.249616', 'step': 20332, 'epoch': 3} {'type': 'loss', 'content': 0.13069979846477509, 'timestamp': '2025-09-30 22:45:00.254023', 'step': 20333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:00.325990', 'step': 20333, 'epoch': 3} {'type': 'loss', 'content': 0.06580191850662231, 'timestamp': '2025-09-30 22:45:00.331459', 'step': 20334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.409764', 'step': 20334, 'epoch': 3} {'type': 'loss', 'content': 0.11368448287248611, 'timestamp': '2025-09-30 22:45:00.414653', 'step': 20335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.472969', 'step': 20335, 'epoch': 3} {'type': 'loss', 'content': 0.17249654233455658, 'timestamp': '2025-09-30 22:45:00.482204', 'step': 20336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:00.566574', 'step': 20336, 'epoch': 3} {'type': 'loss', 'content': 0.07985412329435349, 'timestamp': '2025-09-30 22:45:00.570410', 'step': 20337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:00.632319', 'step': 20337, 'epoch': 3} {'type': 'loss', 'content': 0.07628229260444641, 'timestamp': '2025-09-30 22:45:00.638450', 'step': 20338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.711739', 'step': 20338, 'epoch': 3} {'type': 'loss', 'content': 0.050540585070848465, 'timestamp': '2025-09-30 22:45:00.715065', 'step': 20339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:00.774709', 'step': 20339, 'epoch': 3} {'type': 'loss', 'content': 0.03118649311363697, 'timestamp': '2025-09-30 22:45:00.783429', 'step': 20340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.843963', 'step': 20340, 'epoch': 3} {'type': 'loss', 'content': 0.057222217321395874, 'timestamp': '2025-09-30 22:45:00.848240', 'step': 20341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.906689', 'step': 20341, 'epoch': 3} {'type': 'loss', 'content': 0.11238829046487808, 'timestamp': '2025-09-30 22:45:00.911313', 'step': 20342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:00.969188', 'step': 20342, 'epoch': 3} {'type': 'loss', 'content': 0.01785161904990673, 'timestamp': '2025-09-30 22:45:00.971994', 'step': 20343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:01.031278', 'step': 20343, 'epoch': 3} {'type': 'loss', 'content': 0.10769177228212357, 'timestamp': '2025-09-30 22:45:01.039326', 'step': 20344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:01.102271', 'step': 20344, 'epoch': 3} {'type': 'loss', 'content': 0.11228705197572708, 'timestamp': '2025-09-30 22:45:01.107494', 'step': 20345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:01.166968', 'step': 20345, 'epoch': 3} {'type': 'loss', 'content': 0.04007517918944359, 'timestamp': '2025-09-30 22:45:01.175582', 'step': 20346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:01.236851', 'step': 20346, 'epoch': 3} {'type': 'loss', 'content': 0.06381528824567795, 'timestamp': '2025-09-30 22:45:01.239453', 'step': 20347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:01.306060', 'step': 20347, 'epoch': 3} {'type': 'loss', 'content': 0.14478743076324463, 'timestamp': '2025-09-30 22:45:01.312319', 'step': 20348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:01.368956', 'step': 20348, 'epoch': 3} {'type': 'loss', 'content': 0.05097430571913719, 'timestamp': '2025-09-30 22:45:01.371925', 'step': 20349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:01.432703', 'step': 20349, 'epoch': 3} {'type': 'loss', 'content': 0.05192740634083748, 'timestamp': '2025-09-30 22:45:01.434982', 'step': 20350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:01.490653', 'step': 20350, 'epoch': 3} {'type': 'loss', 'content': 0.06397392600774765, 'timestamp': '2025-09-30 22:45:01.494120', 'step': 20351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:01.551183', 'step': 20351, 'epoch': 3} {'type': 'loss', 'content': 0.06430234760046005, 'timestamp': '2025-09-30 22:45:01.557201', 'step': 20352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:01.630176', 'step': 20352, 'epoch': 3} {'type': 'loss', 'content': 0.10649225115776062, 'timestamp': '2025-09-30 22:45:01.633141', 'step': 20353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:01.690765', 'step': 20353, 'epoch': 3} {'type': 'loss', 'content': 0.08479059487581253, 'timestamp': '2025-09-30 22:45:01.693020', 'step': 20354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:01.755614', 'step': 20354, 'epoch': 3} {'type': 'loss', 'content': 0.09878069162368774, 'timestamp': '2025-09-30 22:45:01.758544', 'step': 20355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:01.818926', 'step': 20355, 'epoch': 3} {'type': 'loss', 'content': 0.07386292517185211, 'timestamp': '2025-09-30 22:45:01.826092', 'step': 20356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:01.885251', 'step': 20356, 'epoch': 3} {'type': 'loss', 'content': 0.08188824355602264, 'timestamp': '2025-09-30 22:45:01.893064', 'step': 20357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:01.962410', 'step': 20357, 'epoch': 3} {'type': 'loss', 'content': 0.09185394644737244, 'timestamp': '2025-09-30 22:45:01.969199', 'step': 20358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:02.032200', 'step': 20358, 'epoch': 3} {'type': 'loss', 'content': 0.08605702966451645, 'timestamp': '2025-09-30 22:45:02.038527', 'step': 20359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:02.095712', 'step': 20359, 'epoch': 3} {'type': 'loss', 'content': 0.08989188075065613, 'timestamp': '2025-09-30 22:45:02.102141', 'step': 20360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:02.159797', 'step': 20360, 'epoch': 3} {'type': 'loss', 'content': 0.1500745266675949, 'timestamp': '2025-09-30 22:45:02.162372', 'step': 20361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:02.241528', 'step': 20361, 'epoch': 3} {'type': 'loss', 'content': 0.12044691294431686, 'timestamp': '2025-09-30 22:45:02.243897', 'step': 20362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:02.310169', 'step': 20362, 'epoch': 3} {'type': 'loss', 'content': 0.084768146276474, 'timestamp': '2025-09-30 22:45:02.317353', 'step': 20363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:02.388501', 'step': 20363, 'epoch': 3} {'type': 'loss', 'content': 0.07307416945695877, 'timestamp': '2025-09-30 22:45:02.399745', 'step': 20364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:02.460071', 'step': 20364, 'epoch': 3} {'type': 'loss', 'content': 0.04909200966358185, 'timestamp': '2025-09-30 22:45:02.469572', 'step': 20365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:02.542752', 'step': 20365, 'epoch': 3} {'type': 'loss', 'content': 0.12796272337436676, 'timestamp': '2025-09-30 22:45:02.549299', 'step': 20366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:02.634607', 'step': 20366, 'epoch': 3} {'type': 'loss', 'content': 0.03202006220817566, 'timestamp': '2025-09-30 22:45:02.648085', 'step': 20367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:02.729101', 'step': 20367, 'epoch': 3} {'type': 'loss', 'content': 0.0402359701693058, 'timestamp': '2025-09-30 22:45:02.750141', 'step': 20368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:02.818826', 'step': 20368, 'epoch': 3} {'type': 'loss', 'content': 0.095526784658432, 'timestamp': '2025-09-30 22:45:02.836066', 'step': 20369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:02.934591', 'step': 20369, 'epoch': 3} {'type': 'loss', 'content': 0.07758793234825134, 'timestamp': '2025-09-30 22:45:02.953546', 'step': 20370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:03.017412', 'step': 20370, 'epoch': 3} {'type': 'loss', 'content': 0.12687304615974426, 'timestamp': '2025-09-30 22:45:03.021554', 'step': 20371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:03.108399', 'step': 20371, 'epoch': 3} {'type': 'loss', 'content': 0.06620972603559494, 'timestamp': '2025-09-30 22:45:03.115140', 'step': 20372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:03.174778', 'step': 20372, 'epoch': 3} {'type': 'loss', 'content': 0.06268151849508286, 'timestamp': '2025-09-30 22:45:03.178793', 'step': 20373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:03.242358', 'step': 20373, 'epoch': 3} {'type': 'loss', 'content': 0.1395028531551361, 'timestamp': '2025-09-30 22:45:03.247056', 'step': 20374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:03.306507', 'step': 20374, 'epoch': 3} {'type': 'loss', 'content': 0.06305938959121704, 'timestamp': '2025-09-30 22:45:03.309096', 'step': 20375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:03.365832', 'step': 20375, 'epoch': 3} {'type': 'loss', 'content': 0.05740618333220482, 'timestamp': '2025-09-30 22:45:03.372346', 'step': 20376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:03.436275', 'step': 20376, 'epoch': 3} {'type': 'loss', 'content': 0.07668084651231766, 'timestamp': '2025-09-30 22:45:03.438937', 'step': 20377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:03.500958', 'step': 20377, 'epoch': 3} {'type': 'loss', 'content': 0.16304324567317963, 'timestamp': '2025-09-30 22:45:03.506013', 'step': 20378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:03.565085', 'step': 20378, 'epoch': 3} {'type': 'loss', 'content': 0.08010110259056091, 'timestamp': '2025-09-30 22:45:03.571203', 'step': 20379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:03.630298', 'step': 20379, 'epoch': 3} {'type': 'loss', 'content': 0.09161566197872162, 'timestamp': '2025-09-30 22:45:03.639002', 'step': 20380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:03.712062', 'step': 20380, 'epoch': 3} {'type': 'loss', 'content': 0.038161374628543854, 'timestamp': '2025-09-30 22:45:03.719318', 'step': 20381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:03.778585', 'step': 20381, 'epoch': 3} {'type': 'loss', 'content': 0.08030296862125397, 'timestamp': '2025-09-30 22:45:03.782943', 'step': 20382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:03.860165', 'step': 20382, 'epoch': 3} {'type': 'loss', 'content': 0.05129633843898773, 'timestamp': '2025-09-30 22:45:03.862890', 'step': 20383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:03.925468', 'step': 20383, 'epoch': 3} {'type': 'loss', 'content': 0.025192203000187874, 'timestamp': '2025-09-30 22:45:03.936240', 'step': 20384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:04.002332', 'step': 20384, 'epoch': 3} {'type': 'loss', 'content': 0.06488729268312454, 'timestamp': '2025-09-30 22:45:04.006358', 'step': 20385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:04.066293', 'step': 20385, 'epoch': 3} {'type': 'loss', 'content': 0.1155417263507843, 'timestamp': '2025-09-30 22:45:04.068762', 'step': 20386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:04.124836', 'step': 20386, 'epoch': 3} {'type': 'loss', 'content': 0.08668942004442215, 'timestamp': '2025-09-30 22:45:04.130427', 'step': 20387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:04.190146', 'step': 20387, 'epoch': 3} {'type': 'loss', 'content': 0.15471743047237396, 'timestamp': '2025-09-30 22:45:04.198066', 'step': 20388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:04.258169', 'step': 20388, 'epoch': 3} {'type': 'loss', 'content': 0.0455324612557888, 'timestamp': '2025-09-30 22:45:04.261270', 'step': 20389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:04.327865', 'step': 20389, 'epoch': 3} {'type': 'loss', 'content': 0.17544522881507874, 'timestamp': '2025-09-30 22:45:04.341285', 'step': 20390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:04.407531', 'step': 20390, 'epoch': 3} {'type': 'loss', 'content': 0.10178980231285095, 'timestamp': '2025-09-30 22:45:04.417073', 'step': 20391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:04.475546', 'step': 20391, 'epoch': 3} {'type': 'loss', 'content': 0.05327393859624863, 'timestamp': '2025-09-30 22:45:04.482331', 'step': 20392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:04.554527', 'step': 20392, 'epoch': 3} {'type': 'loss', 'content': 0.09361621737480164, 'timestamp': '2025-09-30 22:45:04.560401', 'step': 20393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:04.619533', 'step': 20393, 'epoch': 3} {'type': 'loss', 'content': 0.0717366486787796, 'timestamp': '2025-09-30 22:45:04.625653', 'step': 20394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:04.686999', 'step': 20394, 'epoch': 3} {'type': 'loss', 'content': 0.055438388139009476, 'timestamp': '2025-09-30 22:45:04.700942', 'step': 20395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:04.762347', 'step': 20395, 'epoch': 3} {'type': 'loss', 'content': 0.085569366812706, 'timestamp': '2025-09-30 22:45:04.782184', 'step': 20396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:04.841571', 'step': 20396, 'epoch': 3} {'type': 'loss', 'content': 0.16242915391921997, 'timestamp': '2025-09-30 22:45:04.845751', 'step': 20397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:04.904897', 'step': 20397, 'epoch': 3} {'type': 'loss', 'content': 0.10726920515298843, 'timestamp': '2025-09-30 22:45:04.909675', 'step': 20398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:04.970227', 'step': 20398, 'epoch': 3} {'type': 'loss', 'content': 0.09633145481348038, 'timestamp': '2025-09-30 22:45:04.973321', 'step': 20399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:05.038443', 'step': 20399, 'epoch': 3} {'type': 'loss', 'content': 0.059996627271175385, 'timestamp': '2025-09-30 22:45:05.050350', 'step': 20400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:05.114062', 'step': 20400, 'epoch': 3} {'type': 'loss', 'content': 0.13059763610363007, 'timestamp': '2025-09-30 22:45:05.131364', 'step': 20401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:05.198059', 'step': 20401, 'epoch': 3} {'type': 'loss', 'content': 0.057649437338113785, 'timestamp': '2025-09-30 22:45:05.202281', 'step': 20402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:05.261206', 'step': 20402, 'epoch': 3} {'type': 'loss', 'content': 0.0767919272184372, 'timestamp': '2025-09-30 22:45:05.276311', 'step': 20403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:05.351035', 'step': 20403, 'epoch': 3} {'type': 'loss', 'content': 0.07953475415706635, 'timestamp': '2025-09-30 22:45:05.375465', 'step': 20404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:05.436562', 'step': 20404, 'epoch': 3} {'type': 'loss', 'content': 0.08151662349700928, 'timestamp': '2025-09-30 22:45:05.440003', 'step': 20405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:05.498859', 'step': 20405, 'epoch': 3} {'type': 'loss', 'content': 0.14204923808574677, 'timestamp': '2025-09-30 22:45:05.502329', 'step': 20406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:05.568151', 'step': 20406, 'epoch': 3} {'type': 'loss', 'content': 0.05158911645412445, 'timestamp': '2025-09-30 22:45:05.571269', 'step': 20407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:05.630328', 'step': 20407, 'epoch': 3} {'type': 'loss', 'content': 0.08305752277374268, 'timestamp': '2025-09-30 22:45:05.637783', 'step': 20408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:05.697159', 'step': 20408, 'epoch': 3} {'type': 'loss', 'content': 0.046643633395433426, 'timestamp': '2025-09-30 22:45:05.700908', 'step': 20409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:05.761388', 'step': 20409, 'epoch': 3} {'type': 'loss', 'content': 0.1037115752696991, 'timestamp': '2025-09-30 22:45:05.769446', 'step': 20410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:05.840508', 'step': 20410, 'epoch': 3} {'type': 'loss', 'content': 0.01960674673318863, 'timestamp': '2025-09-30 22:45:05.850979', 'step': 20411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:05.921278', 'step': 20411, 'epoch': 3} {'type': 'loss', 'content': 0.06222638860344887, 'timestamp': '2025-09-30 22:45:05.929109', 'step': 20412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:05.998621', 'step': 20412, 'epoch': 3} {'type': 'loss', 'content': 0.05540560930967331, 'timestamp': '2025-09-30 22:45:06.003242', 'step': 20413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:06.075788', 'step': 20413, 'epoch': 3} {'type': 'loss', 'content': 0.07489628344774246, 'timestamp': '2025-09-30 22:45:06.085302', 'step': 20414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:06.158022', 'step': 20414, 'epoch': 3} {'type': 'loss', 'content': 0.10017627477645874, 'timestamp': '2025-09-30 22:45:06.163233', 'step': 20415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:06.229986', 'step': 20415, 'epoch': 3} {'type': 'loss', 'content': 0.13736821711063385, 'timestamp': '2025-09-30 22:45:06.238833', 'step': 20416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:06.304336', 'step': 20416, 'epoch': 3} {'type': 'loss', 'content': 0.0665648877620697, 'timestamp': '2025-09-30 22:45:06.307629', 'step': 20417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:06.374657', 'step': 20417, 'epoch': 3} {'type': 'loss', 'content': 0.05108025670051575, 'timestamp': '2025-09-30 22:45:06.385414', 'step': 20418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:06.448334', 'step': 20418, 'epoch': 3} {'type': 'loss', 'content': 0.08743827790021896, 'timestamp': '2025-09-30 22:45:06.452642', 'step': 20419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:06.528007', 'step': 20419, 'epoch': 3} {'type': 'loss', 'content': 0.12720829248428345, 'timestamp': '2025-09-30 22:45:06.536778', 'step': 20420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:06.621822', 'step': 20420, 'epoch': 3} {'type': 'loss', 'content': 0.08705050498247147, 'timestamp': '2025-09-30 22:45:06.626028', 'step': 20421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:06.691621', 'step': 20421, 'epoch': 3} {'type': 'loss', 'content': 0.13161493837833405, 'timestamp': '2025-09-30 22:45:06.698357', 'step': 20422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:06.774400', 'step': 20422, 'epoch': 3} {'type': 'loss', 'content': 0.002891632029786706, 'timestamp': '2025-09-30 22:45:06.780256', 'step': 20423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:06.856268', 'step': 20423, 'epoch': 3} {'type': 'loss', 'content': 0.07491588592529297, 'timestamp': '2025-09-30 22:45:06.870792', 'step': 20424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:06.949303', 'step': 20424, 'epoch': 3} {'type': 'loss', 'content': 0.08333191275596619, 'timestamp': '2025-09-30 22:45:06.952737', 'step': 20425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:07.029093', 'step': 20425, 'epoch': 3} {'type': 'loss', 'content': 0.10226116329431534, 'timestamp': '2025-09-30 22:45:07.032336', 'step': 20426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:07.101673', 'step': 20426, 'epoch': 3} {'type': 'loss', 'content': 0.11249662935733795, 'timestamp': '2025-09-30 22:45:07.104543', 'step': 20427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:07.194169', 'step': 20427, 'epoch': 3} {'type': 'loss', 'content': 0.08746956288814545, 'timestamp': '2025-09-30 22:45:07.202677', 'step': 20428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:07.286148', 'step': 20428, 'epoch': 3} {'type': 'loss', 'content': 0.13109278678894043, 'timestamp': '2025-09-30 22:45:07.291170', 'step': 20429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:07.372976', 'step': 20429, 'epoch': 3} {'type': 'loss', 'content': 0.07576325535774231, 'timestamp': '2025-09-30 22:45:07.376730', 'step': 20430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:07.442177', 'step': 20430, 'epoch': 3} {'type': 'loss', 'content': 0.08381536602973938, 'timestamp': '2025-09-30 22:45:07.451002', 'step': 20431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:07.564585', 'step': 20431, 'epoch': 3} {'type': 'loss', 'content': 0.030290717259049416, 'timestamp': '2025-09-30 22:45:07.572097', 'step': 20432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:07.655837', 'step': 20432, 'epoch': 3} {'type': 'loss', 'content': 0.09493283927440643, 'timestamp': '2025-09-30 22:45:07.659656', 'step': 20433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:07.731749', 'step': 20433, 'epoch': 3} {'type': 'loss', 'content': 0.03858327493071556, 'timestamp': '2025-09-30 22:45:07.735152', 'step': 20434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:07.837078', 'step': 20434, 'epoch': 3} {'type': 'loss', 'content': 0.05828895419836044, 'timestamp': '2025-09-30 22:45:07.841346', 'step': 20435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:07.932472', 'step': 20435, 'epoch': 3} {'type': 'loss', 'content': 0.03375042602419853, 'timestamp': '2025-09-30 22:45:07.939689', 'step': 20436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:08.011124', 'step': 20436, 'epoch': 3} {'type': 'loss', 'content': 0.10918880254030228, 'timestamp': '2025-09-30 22:45:08.020747', 'step': 20437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:08.093439', 'step': 20437, 'epoch': 3} {'type': 'loss', 'content': 0.101590096950531, 'timestamp': '2025-09-30 22:45:08.096877', 'step': 20438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:08.159913', 'step': 20438, 'epoch': 3} {'type': 'loss', 'content': 0.09340731054544449, 'timestamp': '2025-09-30 22:45:08.163506', 'step': 20439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:08.223505', 'step': 20439, 'epoch': 3} {'type': 'loss', 'content': 0.04765789955854416, 'timestamp': '2025-09-30 22:45:08.232043', 'step': 20440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:08.293214', 'step': 20440, 'epoch': 3} {'type': 'loss', 'content': 0.08085620403289795, 'timestamp': '2025-09-30 22:45:08.297369', 'step': 20441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:08.356307', 'step': 20441, 'epoch': 3} {'type': 'loss', 'content': 0.06450298428535461, 'timestamp': '2025-09-30 22:45:08.359046', 'step': 20442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:08.420076', 'step': 20442, 'epoch': 3} {'type': 'loss', 'content': 0.08555284887552261, 'timestamp': '2025-09-30 22:45:08.423963', 'step': 20443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:08.485896', 'step': 20443, 'epoch': 3} {'type': 'loss', 'content': 0.054336369037628174, 'timestamp': '2025-09-30 22:45:08.494690', 'step': 20444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:08.554012', 'step': 20444, 'epoch': 3} {'type': 'loss', 'content': 0.0326971672475338, 'timestamp': '2025-09-30 22:45:08.557464', 'step': 20445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:45:08.617077', 'step': 20445, 'epoch': 3} {'type': 'loss', 'content': 0.14752094447612762, 'timestamp': '2025-09-30 22:45:08.621097', 'step': 20446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:08.681511', 'step': 20446, 'epoch': 3} {'type': 'loss', 'content': 0.06516878306865692, 'timestamp': '2025-09-30 22:45:08.685818', 'step': 20447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:08.746370', 'step': 20447, 'epoch': 3} {'type': 'loss', 'content': 0.07283380627632141, 'timestamp': '2025-09-30 22:45:08.754252', 'step': 20448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:08.814404', 'step': 20448, 'epoch': 3} {'type': 'loss', 'content': 0.04343711957335472, 'timestamp': '2025-09-30 22:45:08.817242', 'step': 20449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:08.882828', 'step': 20449, 'epoch': 3} {'type': 'loss', 'content': 0.028508009389042854, 'timestamp': '2025-09-30 22:45:08.886408', 'step': 20450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:08.945834', 'step': 20450, 'epoch': 3} {'type': 'loss', 'content': 0.10611630231142044, 'timestamp': '2025-09-30 22:45:08.949289', 'step': 20451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:09.027535', 'step': 20451, 'epoch': 3} {'type': 'loss', 'content': 0.13282914459705353, 'timestamp': '2025-09-30 22:45:09.040800', 'step': 20452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:09.100838', 'step': 20452, 'epoch': 3} {'type': 'loss', 'content': 0.09237274527549744, 'timestamp': '2025-09-30 22:45:09.104328', 'step': 20453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:09.162518', 'step': 20453, 'epoch': 3} {'type': 'loss', 'content': 0.16153761744499207, 'timestamp': '2025-09-30 22:45:09.165659', 'step': 20454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:45:09.233676', 'step': 20454, 'epoch': 3} {'type': 'loss', 'content': 0.11602691560983658, 'timestamp': '2025-09-30 22:45:09.236736', 'step': 20455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:09.296462', 'step': 20455, 'epoch': 3} {'type': 'loss', 'content': 0.11774307489395142, 'timestamp': '2025-09-30 22:45:09.303562', 'step': 20456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:09.363124', 'step': 20456, 'epoch': 3} {'type': 'loss', 'content': 0.07476312667131424, 'timestamp': '2025-09-30 22:45:09.366075', 'step': 20457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:09.426970', 'step': 20457, 'epoch': 3} {'type': 'loss', 'content': 0.11049844324588776, 'timestamp': '2025-09-30 22:45:09.430940', 'step': 20458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:09.488222', 'step': 20458, 'epoch': 3} {'type': 'loss', 'content': 0.13532309234142303, 'timestamp': '2025-09-30 22:45:09.491599', 'step': 20459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:09.564565', 'step': 20459, 'epoch': 3} {'type': 'loss', 'content': 0.10142471641302109, 'timestamp': '2025-09-30 22:45:09.571296', 'step': 20460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:09.628721', 'step': 20460, 'epoch': 3} {'type': 'loss', 'content': 0.06271771341562271, 'timestamp': '2025-09-30 22:45:09.631949', 'step': 20461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:09.709567', 'step': 20461, 'epoch': 3} {'type': 'loss', 'content': 0.09556282311677933, 'timestamp': '2025-09-30 22:45:09.713274', 'step': 20462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:09.776757', 'step': 20462, 'epoch': 3} {'type': 'loss', 'content': 0.08620435744524002, 'timestamp': '2025-09-30 22:45:09.781867', 'step': 20463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:09.860138', 'step': 20463, 'epoch': 3} {'type': 'loss', 'content': 0.09838773310184479, 'timestamp': '2025-09-30 22:45:09.868211', 'step': 20464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:09.934105', 'step': 20464, 'epoch': 3} {'type': 'loss', 'content': 0.08405988663434982, 'timestamp': '2025-09-30 22:45:09.936737', 'step': 20465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:09.996117', 'step': 20465, 'epoch': 3} {'type': 'loss', 'content': 0.09215929359197617, 'timestamp': '2025-09-30 22:45:09.999877', 'step': 20466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:10.059368', 'step': 20466, 'epoch': 3} {'type': 'loss', 'content': 0.03608974441885948, 'timestamp': '2025-09-30 22:45:10.062225', 'step': 20467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:10.128275', 'step': 20467, 'epoch': 3} {'type': 'loss', 'content': 0.07035749405622482, 'timestamp': '2025-09-30 22:45:10.142160', 'step': 20468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:10.204457', 'step': 20468, 'epoch': 3} {'type': 'loss', 'content': 0.07647380232810974, 'timestamp': '2025-09-30 22:45:10.214945', 'step': 20469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:10.278780', 'step': 20469, 'epoch': 3} {'type': 'loss', 'content': 0.07611729949712753, 'timestamp': '2025-09-30 22:45:10.282053', 'step': 20470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:10.340365', 'step': 20470, 'epoch': 3} {'type': 'loss', 'content': 0.2244240790605545, 'timestamp': '2025-09-30 22:45:10.348899', 'step': 20471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:10.411080', 'step': 20471, 'epoch': 3} {'type': 'loss', 'content': 0.17997528612613678, 'timestamp': '2025-09-30 22:45:10.417695', 'step': 20472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:10.487841', 'step': 20472, 'epoch': 3} {'type': 'loss', 'content': 0.22542577981948853, 'timestamp': '2025-09-30 22:45:10.490641', 'step': 20473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:10.549885', 'step': 20473, 'epoch': 3} {'type': 'loss', 'content': 0.10156843066215515, 'timestamp': '2025-09-30 22:45:10.553449', 'step': 20474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:10.624797', 'step': 20474, 'epoch': 3} {'type': 'loss', 'content': 0.0699349194765091, 'timestamp': '2025-09-30 22:45:10.633530', 'step': 20475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:10.696758', 'step': 20475, 'epoch': 3} {'type': 'loss', 'content': 0.0474846251308918, 'timestamp': '2025-09-30 22:45:10.703007', 'step': 20476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:10.789847', 'step': 20476, 'epoch': 3} {'type': 'loss', 'content': 0.1011403501033783, 'timestamp': '2025-09-30 22:45:10.793261', 'step': 20477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:10.852789', 'step': 20477, 'epoch': 3} {'type': 'loss', 'content': 0.09591921418905258, 'timestamp': '2025-09-30 22:45:10.861713', 'step': 20478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:10.919977', 'step': 20478, 'epoch': 3} {'type': 'loss', 'content': 0.08314774185419083, 'timestamp': '2025-09-30 22:45:10.923766', 'step': 20479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:10.985672', 'step': 20479, 'epoch': 3} {'type': 'loss', 'content': 0.045663490891456604, 'timestamp': '2025-09-30 22:45:10.997068', 'step': 20480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:11.066833', 'step': 20480, 'epoch': 3} {'type': 'loss', 'content': 0.061274901032447815, 'timestamp': '2025-09-30 22:45:11.075667', 'step': 20481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:11.142968', 'step': 20481, 'epoch': 3} {'type': 'loss', 'content': 0.023799926042556763, 'timestamp': '2025-09-30 22:45:11.147206', 'step': 20482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:11.206974', 'step': 20482, 'epoch': 3} {'type': 'loss', 'content': 0.10427620261907578, 'timestamp': '2025-09-30 22:45:11.212267', 'step': 20483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:45:11.273878', 'step': 20483, 'epoch': 3} {'type': 'loss', 'content': 0.056489959359169006, 'timestamp': '2025-09-30 22:45:11.280505', 'step': 20484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:11.343383', 'step': 20484, 'epoch': 3} {'type': 'loss', 'content': 0.10420779883861542, 'timestamp': '2025-09-30 22:45:11.352069', 'step': 20485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:11.416121', 'step': 20485, 'epoch': 3} {'type': 'loss', 'content': 0.26351800560951233, 'timestamp': '2025-09-30 22:45:11.428335', 'step': 20486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:11.499846', 'step': 20486, 'epoch': 3} {'type': 'loss', 'content': 0.069978266954422, 'timestamp': '2025-09-30 22:45:11.504745', 'step': 20487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:11.604510', 'step': 20487, 'epoch': 3} {'type': 'loss', 'content': 0.11042595654726028, 'timestamp': '2025-09-30 22:45:11.622162', 'step': 20488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:11.684079', 'step': 20488, 'epoch': 3} {'type': 'loss', 'content': 0.15006136894226074, 'timestamp': '2025-09-30 22:45:11.697502', 'step': 20489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:11.776606', 'step': 20489, 'epoch': 3} {'type': 'loss', 'content': 0.1160217747092247, 'timestamp': '2025-09-30 22:45:11.779913', 'step': 20490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:11.856101', 'step': 20490, 'epoch': 3} {'type': 'loss', 'content': 0.06719234585762024, 'timestamp': '2025-09-30 22:45:11.859607', 'step': 20491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:11.922025', 'step': 20491, 'epoch': 3} {'type': 'loss', 'content': 0.058636777102947235, 'timestamp': '2025-09-30 22:45:11.930444', 'step': 20492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:11.999961', 'step': 20492, 'epoch': 3} {'type': 'loss', 'content': 0.008929068222641945, 'timestamp': '2025-09-30 22:45:12.004098', 'step': 20493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:12.062657', 'step': 20493, 'epoch': 3} {'type': 'loss', 'content': 0.05652661621570587, 'timestamp': '2025-09-30 22:45:12.066548', 'step': 20494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:12.126977', 'step': 20494, 'epoch': 3} {'type': 'loss', 'content': 0.059012558311223984, 'timestamp': '2025-09-30 22:45:12.139586', 'step': 20495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:12.218893', 'step': 20495, 'epoch': 3} {'type': 'loss', 'content': 0.02424527145922184, 'timestamp': '2025-09-30 22:45:12.228153', 'step': 20496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:12.287024', 'step': 20496, 'epoch': 3} {'type': 'loss', 'content': 0.09500281512737274, 'timestamp': '2025-09-30 22:45:12.292516', 'step': 20497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:12.353967', 'step': 20497, 'epoch': 3} {'type': 'loss', 'content': 0.05822603031992912, 'timestamp': '2025-09-30 22:45:12.357717', 'step': 20498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:12.416200', 'step': 20498, 'epoch': 3} {'type': 'loss', 'content': 0.10784120112657547, 'timestamp': '2025-09-30 22:45:12.421406', 'step': 20499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:12.480368', 'step': 20499, 'epoch': 3} {'type': 'loss', 'content': 0.15053750574588776, 'timestamp': '2025-09-30 22:45:12.487075', 'step': 20500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20500', 'timestamp': '2025-09-30 22:45:12.901476', 'step': 20500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:12.963665', 'step': 20500, 'epoch': 3} {'type': 'loss', 'content': 0.03401429206132889, 'timestamp': '2025-09-30 22:45:12.966033', 'step': 20501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:13.030310', 'step': 20501, 'epoch': 3} {'type': 'loss', 'content': 0.05576259642839432, 'timestamp': '2025-09-30 22:45:13.032539', 'step': 20502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:13.096798', 'step': 20502, 'epoch': 3} {'type': 'loss', 'content': 0.08602788299322128, 'timestamp': '2025-09-30 22:45:13.099225', 'step': 20503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:13.155312', 'step': 20503, 'epoch': 3} {'type': 'loss', 'content': 0.06547142565250397, 'timestamp': '2025-09-30 22:45:13.162530', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:45:29.051341', 'step': 20504, 'epoch': 3} {'type': 'pplx', 'content': 8093.344306527871, 'timestamp': '2025-09-30 22:45:29.056280', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:29.144813', 'step': 20504, 'epoch': 3} {'type': 'loss', 'content': 0.12642042338848114, 'timestamp': '2025-09-30 22:45:29.148850', 'step': 20505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:29.213753', 'step': 20505, 'epoch': 3} {'type': 'loss', 'content': 0.018224507570266724, 'timestamp': '2025-09-30 22:45:29.220481', 'step': 20506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:29.300975', 'step': 20506, 'epoch': 3} {'type': 'loss', 'content': 0.02813361957669258, 'timestamp': '2025-09-30 22:45:29.304229', 'step': 20507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:29.367671', 'step': 20507, 'epoch': 3} {'type': 'loss', 'content': 0.06259357929229736, 'timestamp': '2025-09-30 22:45:29.375958', 'step': 20508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:29.440820', 'step': 20508, 'epoch': 3} {'type': 'loss', 'content': 0.08212685585021973, 'timestamp': '2025-09-30 22:45:29.444052', 'step': 20509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:29.527393', 'step': 20509, 'epoch': 3} {'type': 'loss', 'content': 0.06697158515453339, 'timestamp': '2025-09-30 22:45:29.537904', 'step': 20510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:29.626702', 'step': 20510, 'epoch': 3} {'type': 'loss', 'content': 0.09254112094640732, 'timestamp': '2025-09-30 22:45:29.640080', 'step': 20511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:29.748828', 'step': 20511, 'epoch': 3} {'type': 'loss', 'content': 0.037791658192873, 'timestamp': '2025-09-30 22:45:29.757032', 'step': 20512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:29.843836', 'step': 20512, 'epoch': 3} {'type': 'loss', 'content': 0.08846750855445862, 'timestamp': '2025-09-30 22:45:29.851089', 'step': 20513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:29.938340', 'step': 20513, 'epoch': 3} {'type': 'loss', 'content': 0.09670432657003403, 'timestamp': '2025-09-30 22:45:29.947934', 'step': 20514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:30.010516', 'step': 20514, 'epoch': 3} {'type': 'loss', 'content': 0.07951759546995163, 'timestamp': '2025-09-30 22:45:30.018632', 'step': 20515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:30.091576', 'step': 20515, 'epoch': 3} {'type': 'loss', 'content': 0.08623765408992767, 'timestamp': '2025-09-30 22:45:30.099940', 'step': 20516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:30.158290', 'step': 20516, 'epoch': 3} {'type': 'loss', 'content': 0.15137355029582977, 'timestamp': '2025-09-30 22:45:30.161632', 'step': 20517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:30.220960', 'step': 20517, 'epoch': 3} {'type': 'loss', 'content': 0.09658212214708328, 'timestamp': '2025-09-30 22:45:30.232383', 'step': 20518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:30.291366', 'step': 20518, 'epoch': 3} {'type': 'loss', 'content': 0.10985735803842545, 'timestamp': '2025-09-30 22:45:30.294760', 'step': 20519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:30.358182', 'step': 20519, 'epoch': 3} {'type': 'loss', 'content': 0.08043711632490158, 'timestamp': '2025-09-30 22:45:30.373448', 'step': 20520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:30.442111', 'step': 20520, 'epoch': 3} {'type': 'loss', 'content': 0.10318385064601898, 'timestamp': '2025-09-30 22:45:30.454628', 'step': 20521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:30.513362', 'step': 20521, 'epoch': 3} {'type': 'loss', 'content': 0.10799540579319, 'timestamp': '2025-09-30 22:45:30.517830', 'step': 20522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:30.576343', 'step': 20522, 'epoch': 3} {'type': 'loss', 'content': 0.11370903998613358, 'timestamp': '2025-09-30 22:45:30.579378', 'step': 20523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:30.647064', 'step': 20523, 'epoch': 3} {'type': 'loss', 'content': 0.07465467602014542, 'timestamp': '2025-09-30 22:45:30.653754', 'step': 20524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:30.711285', 'step': 20524, 'epoch': 3} {'type': 'loss', 'content': 0.097622349858284, 'timestamp': '2025-09-30 22:45:30.713912', 'step': 20525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:30.774797', 'step': 20525, 'epoch': 3} {'type': 'loss', 'content': 0.04770539328455925, 'timestamp': '2025-09-30 22:45:30.777371', 'step': 20526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:30.836439', 'step': 20526, 'epoch': 3} {'type': 'loss', 'content': 0.07428326457738876, 'timestamp': '2025-09-30 22:45:30.840261', 'step': 20527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:30.902833', 'step': 20527, 'epoch': 3} {'type': 'loss', 'content': 0.11699977517127991, 'timestamp': '2025-09-30 22:45:30.909672', 'step': 20528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:30.969891', 'step': 20528, 'epoch': 3} {'type': 'loss', 'content': 0.05103151500225067, 'timestamp': '2025-09-30 22:45:30.973485', 'step': 20529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:31.040648', 'step': 20529, 'epoch': 3} {'type': 'loss', 'content': 0.03949211910367012, 'timestamp': '2025-09-30 22:45:31.048855', 'step': 20530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:31.112512', 'step': 20530, 'epoch': 3} {'type': 'loss', 'content': 0.07547968626022339, 'timestamp': '2025-09-30 22:45:31.115150', 'step': 20531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:31.183800', 'step': 20531, 'epoch': 3} {'type': 'loss', 'content': 0.03935275226831436, 'timestamp': '2025-09-30 22:45:31.191392', 'step': 20532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:31.251195', 'step': 20532, 'epoch': 3} {'type': 'loss', 'content': 0.0912962406873703, 'timestamp': '2025-09-30 22:45:31.255768', 'step': 20533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:31.316636', 'step': 20533, 'epoch': 3} {'type': 'loss', 'content': 0.09316272288560867, 'timestamp': '2025-09-30 22:45:31.320753', 'step': 20534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:31.380300', 'step': 20534, 'epoch': 3} {'type': 'loss', 'content': 0.07935328781604767, 'timestamp': '2025-09-30 22:45:31.391573', 'step': 20535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:31.458308', 'step': 20535, 'epoch': 3} {'type': 'loss', 'content': 0.051302164793014526, 'timestamp': '2025-09-30 22:45:31.480370', 'step': 20536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:31.539280', 'step': 20536, 'epoch': 3} {'type': 'loss', 'content': 0.09992518275976181, 'timestamp': '2025-09-30 22:45:31.542347', 'step': 20537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:31.604034', 'step': 20537, 'epoch': 3} {'type': 'loss', 'content': 0.11086924374103546, 'timestamp': '2025-09-30 22:45:31.610050', 'step': 20538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:31.683775', 'step': 20538, 'epoch': 3} {'type': 'loss', 'content': 0.07158100605010986, 'timestamp': '2025-09-30 22:45:31.695416', 'step': 20539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:31.765682', 'step': 20539, 'epoch': 3} {'type': 'loss', 'content': 0.10948704183101654, 'timestamp': '2025-09-30 22:45:31.773364', 'step': 20540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:31.830782', 'step': 20540, 'epoch': 3} {'type': 'loss', 'content': 0.1222241073846817, 'timestamp': '2025-09-30 22:45:31.835201', 'step': 20541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:31.909371', 'step': 20541, 'epoch': 3} {'type': 'loss', 'content': 0.09914538264274597, 'timestamp': '2025-09-30 22:45:31.914930', 'step': 20542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:31.990868', 'step': 20542, 'epoch': 3} {'type': 'loss', 'content': 0.07311264425516129, 'timestamp': '2025-09-30 22:45:31.996107', 'step': 20543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:32.056146', 'step': 20543, 'epoch': 3} {'type': 'loss', 'content': 0.12139846384525299, 'timestamp': '2025-09-30 22:45:32.075949', 'step': 20544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:32.134404', 'step': 20544, 'epoch': 3} {'type': 'loss', 'content': 0.11605992913246155, 'timestamp': '2025-09-30 22:45:32.140304', 'step': 20545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:32.203198', 'step': 20545, 'epoch': 3} {'type': 'loss', 'content': 0.1551746279001236, 'timestamp': '2025-09-30 22:45:32.207473', 'step': 20546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:32.267084', 'step': 20546, 'epoch': 3} {'type': 'loss', 'content': 0.16323943436145782, 'timestamp': '2025-09-30 22:45:32.271641', 'step': 20547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:32.343474', 'step': 20547, 'epoch': 3} {'type': 'loss', 'content': 0.12569893896579742, 'timestamp': '2025-09-30 22:45:32.351865', 'step': 20548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:32.412179', 'step': 20548, 'epoch': 3} {'type': 'loss', 'content': 0.05629498139023781, 'timestamp': '2025-09-30 22:45:32.422759', 'step': 20549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:32.485524', 'step': 20549, 'epoch': 3} {'type': 'loss', 'content': 0.10111161321401596, 'timestamp': '2025-09-30 22:45:32.488075', 'step': 20550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:32.555099', 'step': 20550, 'epoch': 3} {'type': 'loss', 'content': 0.10922800749540329, 'timestamp': '2025-09-30 22:45:32.566160', 'step': 20551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:32.631961', 'step': 20551, 'epoch': 3} {'type': 'loss', 'content': 0.07951938360929489, 'timestamp': '2025-09-30 22:45:32.650873', 'step': 20552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:32.715132', 'step': 20552, 'epoch': 3} {'type': 'loss', 'content': 0.14284102618694305, 'timestamp': '2025-09-30 22:45:32.729542', 'step': 20553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:32.797755', 'step': 20553, 'epoch': 3} {'type': 'loss', 'content': 0.07243511080741882, 'timestamp': '2025-09-30 22:45:32.800782', 'step': 20554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:32.879994', 'step': 20554, 'epoch': 3} {'type': 'loss', 'content': 0.12040087580680847, 'timestamp': '2025-09-30 22:45:32.884157', 'step': 20555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:32.957167', 'step': 20555, 'epoch': 3} {'type': 'loss', 'content': 0.10096333175897598, 'timestamp': '2025-09-30 22:45:32.974755', 'step': 20556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:33.060809', 'step': 20556, 'epoch': 3} {'type': 'loss', 'content': 0.06932458281517029, 'timestamp': '2025-09-30 22:45:33.066181', 'step': 20557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:33.133480', 'step': 20557, 'epoch': 3} {'type': 'loss', 'content': 0.048674870282411575, 'timestamp': '2025-09-30 22:45:33.146985', 'step': 20558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:33.205124', 'step': 20558, 'epoch': 3} {'type': 'loss', 'content': 0.12081287801265717, 'timestamp': '2025-09-30 22:45:33.217329', 'step': 20559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:33.285701', 'step': 20559, 'epoch': 3} {'type': 'loss', 'content': 0.06153298169374466, 'timestamp': '2025-09-30 22:45:33.306420', 'step': 20560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:33.366413', 'step': 20560, 'epoch': 3} {'type': 'loss', 'content': 0.06360015273094177, 'timestamp': '2025-09-30 22:45:33.381570', 'step': 20561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:33.450705', 'step': 20561, 'epoch': 3} {'type': 'loss', 'content': 0.18854805827140808, 'timestamp': '2025-09-30 22:45:33.454091', 'step': 20562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:33.517966', 'step': 20562, 'epoch': 3} {'type': 'loss', 'content': 0.0922631323337555, 'timestamp': '2025-09-30 22:45:33.522066', 'step': 20563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:33.588517', 'step': 20563, 'epoch': 3} {'type': 'loss', 'content': 0.12459559738636017, 'timestamp': '2025-09-30 22:45:33.608461', 'step': 20564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:33.685563', 'step': 20564, 'epoch': 3} {'type': 'loss', 'content': 0.10281417518854141, 'timestamp': '2025-09-30 22:45:33.705506', 'step': 20565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:33.764757', 'step': 20565, 'epoch': 3} {'type': 'loss', 'content': 0.1019233837723732, 'timestamp': '2025-09-30 22:45:33.779416', 'step': 20566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:33.838918', 'step': 20566, 'epoch': 3} {'type': 'loss', 'content': 0.04339022934436798, 'timestamp': '2025-09-30 22:45:33.850148', 'step': 20567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:33.918015', 'step': 20567, 'epoch': 3} {'type': 'loss', 'content': 0.12014470994472504, 'timestamp': '2025-09-30 22:45:33.924924', 'step': 20568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:34.001722', 'step': 20568, 'epoch': 3} {'type': 'loss', 'content': 0.08887212723493576, 'timestamp': '2025-09-30 22:45:34.013793', 'step': 20569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:34.080338', 'step': 20569, 'epoch': 3} {'type': 'loss', 'content': 0.08418498933315277, 'timestamp': '2025-09-30 22:45:34.094641', 'step': 20570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:34.167968', 'step': 20570, 'epoch': 3} {'type': 'loss', 'content': 0.05695091933012009, 'timestamp': '2025-09-30 22:45:34.182376', 'step': 20571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:34.254805', 'step': 20571, 'epoch': 3} {'type': 'loss', 'content': 0.03283365070819855, 'timestamp': '2025-09-30 22:45:34.271970', 'step': 20572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:34.348743', 'step': 20572, 'epoch': 3} {'type': 'loss', 'content': 0.06029927730560303, 'timestamp': '2025-09-30 22:45:34.364073', 'step': 20573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:34.452656', 'step': 20573, 'epoch': 3} {'type': 'loss', 'content': 0.11725304275751114, 'timestamp': '2025-09-30 22:45:34.466103', 'step': 20574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:34.528716', 'step': 20574, 'epoch': 3} {'type': 'loss', 'content': 0.1129022166132927, 'timestamp': '2025-09-30 22:45:34.540115', 'step': 20575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:34.610240', 'step': 20575, 'epoch': 3} {'type': 'loss', 'content': 0.1322033852338791, 'timestamp': '2025-09-30 22:45:34.624880', 'step': 20576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:34.685131', 'step': 20576, 'epoch': 3} {'type': 'loss', 'content': 0.07545242458581924, 'timestamp': '2025-09-30 22:45:34.698013', 'step': 20577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:34.765723', 'step': 20577, 'epoch': 3} {'type': 'loss', 'content': 0.21954412758350372, 'timestamp': '2025-09-30 22:45:34.770944', 'step': 20578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:34.838869', 'step': 20578, 'epoch': 3} {'type': 'loss', 'content': 0.04752510040998459, 'timestamp': '2025-09-30 22:45:34.844112', 'step': 20579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:34.918415', 'step': 20579, 'epoch': 3} {'type': 'loss', 'content': 0.06680157035589218, 'timestamp': '2025-09-30 22:45:34.936012', 'step': 20580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:35.000201', 'step': 20580, 'epoch': 3} {'type': 'loss', 'content': 0.09323228150606155, 'timestamp': '2025-09-30 22:45:35.012996', 'step': 20581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:35.071601', 'step': 20581, 'epoch': 3} {'type': 'loss', 'content': 0.09333081543445587, 'timestamp': '2025-09-30 22:45:35.075321', 'step': 20582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:35.133504', 'step': 20582, 'epoch': 3} {'type': 'loss', 'content': 0.03781838342547417, 'timestamp': '2025-09-30 22:45:35.145072', 'step': 20583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:35.203910', 'step': 20583, 'epoch': 3} {'type': 'loss', 'content': 0.05223413556814194, 'timestamp': '2025-09-30 22:45:35.212074', 'step': 20584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:35.278744', 'step': 20584, 'epoch': 3} {'type': 'loss', 'content': 0.0402582548558712, 'timestamp': '2025-09-30 22:45:35.284519', 'step': 20585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:35.356641', 'step': 20585, 'epoch': 3} {'type': 'loss', 'content': 0.03470691293478012, 'timestamp': '2025-09-30 22:45:35.362458', 'step': 20586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:35.422303', 'step': 20586, 'epoch': 3} {'type': 'loss', 'content': 0.049612682312726974, 'timestamp': '2025-09-30 22:45:35.442520', 'step': 20587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:35.515793', 'step': 20587, 'epoch': 3} {'type': 'loss', 'content': 0.07626568526029587, 'timestamp': '2025-09-30 22:45:35.540582', 'step': 20588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:45:35.599656', 'step': 20588, 'epoch': 3} {'type': 'loss', 'content': 0.03184351697564125, 'timestamp': '2025-09-30 22:45:35.623365', 'step': 20589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:35.681225', 'step': 20589, 'epoch': 3} {'type': 'loss', 'content': 0.05243216082453728, 'timestamp': '2025-09-30 22:45:35.686690', 'step': 20590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:35.746892', 'step': 20590, 'epoch': 3} {'type': 'loss', 'content': 0.10165122151374817, 'timestamp': '2025-09-30 22:45:35.774914', 'step': 20591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:35.834446', 'step': 20591, 'epoch': 3} {'type': 'loss', 'content': 0.11252284795045853, 'timestamp': '2025-09-30 22:45:35.862428', 'step': 20592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:35.920992', 'step': 20592, 'epoch': 3} {'type': 'loss', 'content': 0.15823079645633698, 'timestamp': '2025-09-30 22:45:35.928962', 'step': 20593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:36.001595', 'step': 20593, 'epoch': 3} {'type': 'loss', 'content': 0.056645724922418594, 'timestamp': '2025-09-30 22:45:36.022251', 'step': 20594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:36.105851', 'step': 20594, 'epoch': 3} {'type': 'loss', 'content': 0.06470578163862228, 'timestamp': '2025-09-30 22:45:36.127189', 'step': 20595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:36.195770', 'step': 20595, 'epoch': 3} {'type': 'loss', 'content': 0.11108444631099701, 'timestamp': '2025-09-30 22:45:36.219011', 'step': 20596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:36.285948', 'step': 20596, 'epoch': 3} {'type': 'loss', 'content': 0.06007778272032738, 'timestamp': '2025-09-30 22:45:36.292090', 'step': 20597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:36.363759', 'step': 20597, 'epoch': 3} {'type': 'loss', 'content': 0.04777875915169716, 'timestamp': '2025-09-30 22:45:36.368364', 'step': 20598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:36.435978', 'step': 20598, 'epoch': 3} {'type': 'loss', 'content': 0.03684089705348015, 'timestamp': '2025-09-30 22:45:36.453734', 'step': 20599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:36.533813', 'step': 20599, 'epoch': 3} {'type': 'loss', 'content': 0.1166626587510109, 'timestamp': '2025-09-30 22:45:36.544122', 'step': 20600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:36.603633', 'step': 20600, 'epoch': 3} {'type': 'loss', 'content': 0.09789624810218811, 'timestamp': '2025-09-30 22:45:36.623444', 'step': 20601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:36.698484', 'step': 20601, 'epoch': 3} {'type': 'loss', 'content': 0.09608346968889236, 'timestamp': '2025-09-30 22:45:36.704495', 'step': 20602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:36.763124', 'step': 20602, 'epoch': 3} {'type': 'loss', 'content': 0.08552007377147675, 'timestamp': '2025-09-30 22:45:36.766929', 'step': 20603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:36.826540', 'step': 20603, 'epoch': 3} {'type': 'loss', 'content': 0.177425354719162, 'timestamp': '2025-09-30 22:45:36.847588', 'step': 20604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:36.908106', 'step': 20604, 'epoch': 3} {'type': 'loss', 'content': 0.07689079642295837, 'timestamp': '2025-09-30 22:45:36.927402', 'step': 20605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:37.000673', 'step': 20605, 'epoch': 3} {'type': 'loss', 'content': 0.1107301414012909, 'timestamp': '2025-09-30 22:45:37.019614', 'step': 20606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:37.077595', 'step': 20606, 'epoch': 3} {'type': 'loss', 'content': 0.113826684653759, 'timestamp': '2025-09-30 22:45:37.083442', 'step': 20607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:37.142134', 'step': 20607, 'epoch': 3} {'type': 'loss', 'content': 0.10604503750801086, 'timestamp': '2025-09-30 22:45:37.165898', 'step': 20608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:37.226467', 'step': 20608, 'epoch': 3} {'type': 'loss', 'content': 0.11166106164455414, 'timestamp': '2025-09-30 22:45:37.249451', 'step': 20609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:37.326053', 'step': 20609, 'epoch': 3} {'type': 'loss', 'content': 0.13717564940452576, 'timestamp': '2025-09-30 22:45:37.345883', 'step': 20610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:37.404985', 'step': 20610, 'epoch': 3} {'type': 'loss', 'content': 0.08269374072551727, 'timestamp': '2025-09-30 22:45:37.416986', 'step': 20611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:37.501294', 'step': 20611, 'epoch': 3} {'type': 'loss', 'content': 0.07734508812427521, 'timestamp': '2025-09-30 22:45:37.513952', 'step': 20612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:37.584163', 'step': 20612, 'epoch': 3} {'type': 'loss', 'content': 0.056031253188848495, 'timestamp': '2025-09-30 22:45:37.589266', 'step': 20613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:37.650821', 'step': 20613, 'epoch': 3} {'type': 'loss', 'content': 0.1457212269306183, 'timestamp': '2025-09-30 22:45:37.658547', 'step': 20614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:37.721235', 'step': 20614, 'epoch': 3} {'type': 'loss', 'content': 0.11115724593400955, 'timestamp': '2025-09-30 22:45:37.728139', 'step': 20615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:37.802421', 'step': 20615, 'epoch': 3} {'type': 'loss', 'content': 0.14455102384090424, 'timestamp': '2025-09-30 22:45:37.827627', 'step': 20616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:37.901801', 'step': 20616, 'epoch': 3} {'type': 'loss', 'content': 0.059220463037490845, 'timestamp': '2025-09-30 22:45:37.923433', 'step': 20617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:38.018353', 'step': 20617, 'epoch': 3} {'type': 'loss', 'content': 0.10657449811697006, 'timestamp': '2025-09-30 22:45:38.032479', 'step': 20618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:38.104621', 'step': 20618, 'epoch': 3} {'type': 'loss', 'content': 0.05209306254982948, 'timestamp': '2025-09-30 22:45:38.107757', 'step': 20619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:38.175784', 'step': 20619, 'epoch': 3} {'type': 'loss', 'content': 0.05737817659974098, 'timestamp': '2025-09-30 22:45:38.184116', 'step': 20620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:38.253231', 'step': 20620, 'epoch': 3} {'type': 'loss', 'content': 0.12519825994968414, 'timestamp': '2025-09-30 22:45:38.262930', 'step': 20621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:38.338633', 'step': 20621, 'epoch': 3} {'type': 'loss', 'content': 0.0414617657661438, 'timestamp': '2025-09-30 22:45:38.342314', 'step': 20622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:38.406127', 'step': 20622, 'epoch': 3} {'type': 'loss', 'content': 0.09544242173433304, 'timestamp': '2025-09-30 22:45:38.419460', 'step': 20623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:38.489731', 'step': 20623, 'epoch': 3} {'type': 'loss', 'content': 0.06593070179224014, 'timestamp': '2025-09-30 22:45:38.497105', 'step': 20624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:38.557741', 'step': 20624, 'epoch': 3} {'type': 'loss', 'content': 0.07130946964025497, 'timestamp': '2025-09-30 22:45:38.561992', 'step': 20625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:38.626563', 'step': 20625, 'epoch': 3} {'type': 'loss', 'content': 0.06783711165189743, 'timestamp': '2025-09-30 22:45:38.641035', 'step': 20626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:38.705809', 'step': 20626, 'epoch': 3} {'type': 'loss', 'content': 0.09072267264127731, 'timestamp': '2025-09-30 22:45:38.709161', 'step': 20627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:38.771384', 'step': 20627, 'epoch': 3} {'type': 'loss', 'content': 0.05856500566005707, 'timestamp': '2025-09-30 22:45:38.783532', 'step': 20628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:38.854650', 'step': 20628, 'epoch': 3} {'type': 'loss', 'content': 0.0625206008553505, 'timestamp': '2025-09-30 22:45:38.859568', 'step': 20629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:38.940437', 'step': 20629, 'epoch': 3} {'type': 'loss', 'content': 0.08346013724803925, 'timestamp': '2025-09-30 22:45:38.944210', 'step': 20630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:39.009340', 'step': 20630, 'epoch': 3} {'type': 'loss', 'content': 0.09099599719047546, 'timestamp': '2025-09-30 22:45:39.012987', 'step': 20631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:39.079212', 'step': 20631, 'epoch': 3} {'type': 'loss', 'content': 0.029612332582473755, 'timestamp': '2025-09-30 22:45:39.091350', 'step': 20632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:39.165308', 'step': 20632, 'epoch': 3} {'type': 'loss', 'content': 0.06631887704133987, 'timestamp': '2025-09-30 22:45:39.172984', 'step': 20633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:39.234847', 'step': 20633, 'epoch': 3} {'type': 'loss', 'content': 0.0958145260810852, 'timestamp': '2025-09-30 22:45:39.237950', 'step': 20634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:39.303395', 'step': 20634, 'epoch': 3} {'type': 'loss', 'content': 0.057727500796318054, 'timestamp': '2025-09-30 22:45:39.315850', 'step': 20635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:39.382125', 'step': 20635, 'epoch': 3} {'type': 'loss', 'content': 0.05670025944709778, 'timestamp': '2025-09-30 22:45:39.388865', 'step': 20636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:39.455311', 'step': 20636, 'epoch': 3} {'type': 'loss', 'content': 0.08378112316131592, 'timestamp': '2025-09-30 22:45:39.469734', 'step': 20637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:39.543445', 'step': 20637, 'epoch': 3} {'type': 'loss', 'content': 0.061233773827552795, 'timestamp': '2025-09-30 22:45:39.553905', 'step': 20638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:39.614130', 'step': 20638, 'epoch': 3} {'type': 'loss', 'content': 0.0652870163321495, 'timestamp': '2025-09-30 22:45:39.622825', 'step': 20639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:39.694745', 'step': 20639, 'epoch': 3} {'type': 'loss', 'content': 0.10740088671445847, 'timestamp': '2025-09-30 22:45:39.706581', 'step': 20640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:39.789270', 'step': 20640, 'epoch': 3} {'type': 'loss', 'content': 0.042718105018138885, 'timestamp': '2025-09-30 22:45:39.792957', 'step': 20641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:39.856379', 'step': 20641, 'epoch': 3} {'type': 'loss', 'content': 0.09538412094116211, 'timestamp': '2025-09-30 22:45:39.864528', 'step': 20642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:39.937826', 'step': 20642, 'epoch': 3} {'type': 'loss', 'content': 0.08993063867092133, 'timestamp': '2025-09-30 22:45:39.946141', 'step': 20643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:40.007536', 'step': 20643, 'epoch': 3} {'type': 'loss', 'content': 0.03216201812028885, 'timestamp': '2025-09-30 22:45:40.018915', 'step': 20644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:40.088690', 'step': 20644, 'epoch': 3} {'type': 'loss', 'content': 0.061852674931287766, 'timestamp': '2025-09-30 22:45:40.092038', 'step': 20645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:40.171178', 'step': 20645, 'epoch': 3} {'type': 'loss', 'content': 0.133575901389122, 'timestamp': '2025-09-30 22:45:40.181527', 'step': 20646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:40.245972', 'step': 20646, 'epoch': 3} {'type': 'loss', 'content': 0.07746123522520065, 'timestamp': '2025-09-30 22:45:40.250753', 'step': 20647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:40.308637', 'step': 20647, 'epoch': 3} {'type': 'loss', 'content': 0.04310040548443794, 'timestamp': '2025-09-30 22:45:40.321648', 'step': 20648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:40.391477', 'step': 20648, 'epoch': 3} {'type': 'loss', 'content': 0.0712505578994751, 'timestamp': '2025-09-30 22:45:40.401284', 'step': 20649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:40.471482', 'step': 20649, 'epoch': 3} {'type': 'loss', 'content': 0.10261445492506027, 'timestamp': '2025-09-30 22:45:40.477274', 'step': 20650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:40.543245', 'step': 20650, 'epoch': 3} {'type': 'loss', 'content': 0.08657071739435196, 'timestamp': '2025-09-30 22:45:40.547572', 'step': 20651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:40.609881', 'step': 20651, 'epoch': 3} {'type': 'loss', 'content': 0.05088573694229126, 'timestamp': '2025-09-30 22:45:40.621387', 'step': 20652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:40.688471', 'step': 20652, 'epoch': 3} {'type': 'loss', 'content': 0.05816822871565819, 'timestamp': '2025-09-30 22:45:40.697923', 'step': 20653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:40.780884', 'step': 20653, 'epoch': 3} {'type': 'loss', 'content': 0.06741397082805634, 'timestamp': '2025-09-30 22:45:40.790420', 'step': 20654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:40.866824', 'step': 20654, 'epoch': 3} {'type': 'loss', 'content': 0.049017056822776794, 'timestamp': '2025-09-30 22:45:40.869436', 'step': 20655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:40.935532', 'step': 20655, 'epoch': 3} {'type': 'loss', 'content': 0.11101523786783218, 'timestamp': '2025-09-30 22:45:40.950236', 'step': 20656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:41.030428', 'step': 20656, 'epoch': 3} {'type': 'loss', 'content': 0.09807997196912766, 'timestamp': '2025-09-30 22:45:41.034049', 'step': 20657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:41.124130', 'step': 20657, 'epoch': 3} {'type': 'loss', 'content': 0.09829804301261902, 'timestamp': '2025-09-30 22:45:41.127870', 'step': 20658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:41.188074', 'step': 20658, 'epoch': 3} {'type': 'loss', 'content': 0.09411162883043289, 'timestamp': '2025-09-30 22:45:41.197477', 'step': 20659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:41.264119', 'step': 20659, 'epoch': 3} {'type': 'loss', 'content': 0.10664399713277817, 'timestamp': '2025-09-30 22:45:41.274365', 'step': 20660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:41.342194', 'step': 20660, 'epoch': 3} {'type': 'loss', 'content': 0.062380071729421616, 'timestamp': '2025-09-30 22:45:41.354549', 'step': 20661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:41.420389', 'step': 20661, 'epoch': 3} {'type': 'loss', 'content': 0.0630016028881073, 'timestamp': '2025-09-30 22:45:41.425072', 'step': 20662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:41.492300', 'step': 20662, 'epoch': 3} {'type': 'loss', 'content': 0.056598007678985596, 'timestamp': '2025-09-30 22:45:41.501444', 'step': 20663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:41.568271', 'step': 20663, 'epoch': 3} {'type': 'loss', 'content': 0.06586393713951111, 'timestamp': '2025-09-30 22:45:41.581462', 'step': 20664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:41.659724', 'step': 20664, 'epoch': 3} {'type': 'loss', 'content': 0.08650711178779602, 'timestamp': '2025-09-30 22:45:41.670398', 'step': 20665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:41.735539', 'step': 20665, 'epoch': 3} {'type': 'loss', 'content': 0.11442168802022934, 'timestamp': '2025-09-30 22:45:41.739352', 'step': 20666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:41.797777', 'step': 20666, 'epoch': 3} {'type': 'loss', 'content': 0.07164022326469421, 'timestamp': '2025-09-30 22:45:41.804392', 'step': 20667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:41.876276', 'step': 20667, 'epoch': 3} {'type': 'loss', 'content': 0.1057397797703743, 'timestamp': '2025-09-30 22:45:41.892089', 'step': 20668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:41.964042', 'step': 20668, 'epoch': 3} {'type': 'loss', 'content': 0.11510039865970612, 'timestamp': '2025-09-30 22:45:41.969405', 'step': 20669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:42.036818', 'step': 20669, 'epoch': 3} {'type': 'loss', 'content': 0.08981180191040039, 'timestamp': '2025-09-30 22:45:42.045312', 'step': 20670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:42.110413', 'step': 20670, 'epoch': 3} {'type': 'loss', 'content': 0.07242923974990845, 'timestamp': '2025-09-30 22:45:42.113543', 'step': 20671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:42.188631', 'step': 20671, 'epoch': 3} {'type': 'loss', 'content': 0.025818999856710434, 'timestamp': '2025-09-30 22:45:42.200979', 'step': 20672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:42.276963', 'step': 20672, 'epoch': 3} {'type': 'loss', 'content': 0.03404522314667702, 'timestamp': '2025-09-30 22:45:42.280828', 'step': 20673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:42.343523', 'step': 20673, 'epoch': 3} {'type': 'loss', 'content': 0.07617180049419403, 'timestamp': '2025-09-30 22:45:42.351754', 'step': 20674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:42.412075', 'step': 20674, 'epoch': 3} {'type': 'loss', 'content': 0.0829097107052803, 'timestamp': '2025-09-30 22:45:42.424605', 'step': 20675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:42.504634', 'step': 20675, 'epoch': 3} {'type': 'loss', 'content': 0.055856406688690186, 'timestamp': '2025-09-30 22:45:42.522975', 'step': 20676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:42.585978', 'step': 20676, 'epoch': 3} {'type': 'loss', 'content': 0.13876956701278687, 'timestamp': '2025-09-30 22:45:42.594146', 'step': 20677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:42.663927', 'step': 20677, 'epoch': 3} {'type': 'loss', 'content': 0.16095447540283203, 'timestamp': '2025-09-30 22:45:42.666761', 'step': 20678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:42.745761', 'step': 20678, 'epoch': 3} {'type': 'loss', 'content': 0.06788703799247742, 'timestamp': '2025-09-30 22:45:42.749996', 'step': 20679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:42.819830', 'step': 20679, 'epoch': 3} {'type': 'loss', 'content': 0.06815426796674728, 'timestamp': '2025-09-30 22:45:42.826096', 'step': 20680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:42.885994', 'step': 20680, 'epoch': 3} {'type': 'loss', 'content': 0.07339280098676682, 'timestamp': '2025-09-30 22:45:42.895556', 'step': 20681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:42.972930', 'step': 20681, 'epoch': 3} {'type': 'loss', 'content': 0.08036946505308151, 'timestamp': '2025-09-30 22:45:42.983632', 'step': 20682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:43.055699', 'step': 20682, 'epoch': 3} {'type': 'loss', 'content': 0.024486735463142395, 'timestamp': '2025-09-30 22:45:43.059823', 'step': 20683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:43.117384', 'step': 20683, 'epoch': 3} {'type': 'loss', 'content': 0.07126588374376297, 'timestamp': '2025-09-30 22:45:43.123403', 'step': 20684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:43.188554', 'step': 20684, 'epoch': 3} {'type': 'loss', 'content': 0.14152082800865173, 'timestamp': '2025-09-30 22:45:43.196463', 'step': 20685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:43.260512', 'step': 20685, 'epoch': 3} {'type': 'loss', 'content': 0.13952839374542236, 'timestamp': '2025-09-30 22:45:43.268212', 'step': 20686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:43.330274', 'step': 20686, 'epoch': 3} {'type': 'loss', 'content': 0.09770846366882324, 'timestamp': '2025-09-30 22:45:43.334414', 'step': 20687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:43.397571', 'step': 20687, 'epoch': 3} {'type': 'loss', 'content': 0.12988276779651642, 'timestamp': '2025-09-30 22:45:43.408598', 'step': 20688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:43.482407', 'step': 20688, 'epoch': 3} {'type': 'loss', 'content': 0.06942126154899597, 'timestamp': '2025-09-30 22:45:43.490646', 'step': 20689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:43.553966', 'step': 20689, 'epoch': 3} {'type': 'loss', 'content': 0.03604579716920853, 'timestamp': '2025-09-30 22:45:43.556921', 'step': 20690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:43.633962', 'step': 20690, 'epoch': 3} {'type': 'loss', 'content': 0.16015830636024475, 'timestamp': '2025-09-30 22:45:43.641566', 'step': 20691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:43.701218', 'step': 20691, 'epoch': 3} {'type': 'loss', 'content': 0.19277256727218628, 'timestamp': '2025-09-30 22:45:43.711330', 'step': 20692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:43.768853', 'step': 20692, 'epoch': 3} {'type': 'loss', 'content': 0.08187907934188843, 'timestamp': '2025-09-30 22:45:43.778746', 'step': 20693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:43.854290', 'step': 20693, 'epoch': 3} {'type': 'loss', 'content': 0.044401880353689194, 'timestamp': '2025-09-30 22:45:43.863454', 'step': 20694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:43.939559', 'step': 20694, 'epoch': 3} {'type': 'loss', 'content': 0.08881825953722, 'timestamp': '2025-09-30 22:45:43.942806', 'step': 20695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:44.010633', 'step': 20695, 'epoch': 3} {'type': 'loss', 'content': 0.07433818280696869, 'timestamp': '2025-09-30 22:45:44.023307', 'step': 20696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:44.082580', 'step': 20696, 'epoch': 3} {'type': 'loss', 'content': 0.08834873884916306, 'timestamp': '2025-09-30 22:45:44.090763', 'step': 20697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:44.153868', 'step': 20697, 'epoch': 3} {'type': 'loss', 'content': 0.10599980503320694, 'timestamp': '2025-09-30 22:45:44.157172', 'step': 20698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:44.233833', 'step': 20698, 'epoch': 3} {'type': 'loss', 'content': 0.10862219333648682, 'timestamp': '2025-09-30 22:45:44.237439', 'step': 20699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:44.305909', 'step': 20699, 'epoch': 3} {'type': 'loss', 'content': 0.10030807554721832, 'timestamp': '2025-09-30 22:45:44.313708', 'step': 20700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:44.375386', 'step': 20700, 'epoch': 3} {'type': 'loss', 'content': 0.10236072540283203, 'timestamp': '2025-09-30 22:45:44.378747', 'step': 20701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:44.437245', 'step': 20701, 'epoch': 3} {'type': 'loss', 'content': 0.055526651442050934, 'timestamp': '2025-09-30 22:45:44.444001', 'step': 20702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:44.507137', 'step': 20702, 'epoch': 3} {'type': 'loss', 'content': 0.12098466604948044, 'timestamp': '2025-09-30 22:45:44.516107', 'step': 20703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:44.586822', 'step': 20703, 'epoch': 3} {'type': 'loss', 'content': 0.06857625395059586, 'timestamp': '2025-09-30 22:45:44.593159', 'step': 20704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:44.655320', 'step': 20704, 'epoch': 3} {'type': 'loss', 'content': 0.13796882331371307, 'timestamp': '2025-09-30 22:45:44.658018', 'step': 20705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:44.732448', 'step': 20705, 'epoch': 3} {'type': 'loss', 'content': 0.12951916456222534, 'timestamp': '2025-09-30 22:45:44.735232', 'step': 20706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:44.802398', 'step': 20706, 'epoch': 3} {'type': 'loss', 'content': 0.10491643846035004, 'timestamp': '2025-09-30 22:45:44.806783', 'step': 20707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:44.875250', 'step': 20707, 'epoch': 3} {'type': 'loss', 'content': 0.05794251337647438, 'timestamp': '2025-09-30 22:45:44.881865', 'step': 20708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:44.946498', 'step': 20708, 'epoch': 3} {'type': 'loss', 'content': 0.08332149684429169, 'timestamp': '2025-09-30 22:45:44.960932', 'step': 20709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:45.019836', 'step': 20709, 'epoch': 3} {'type': 'loss', 'content': 0.10027002543210983, 'timestamp': '2025-09-30 22:45:45.022525', 'step': 20710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:45.080482', 'step': 20710, 'epoch': 3} {'type': 'loss', 'content': 0.10820673406124115, 'timestamp': '2025-09-30 22:45:45.084037', 'step': 20711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:45.149124', 'step': 20711, 'epoch': 3} {'type': 'loss', 'content': 0.061459098011255264, 'timestamp': '2025-09-30 22:45:45.156026', 'step': 20712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:45.212771', 'step': 20712, 'epoch': 3} {'type': 'loss', 'content': 0.05877281725406647, 'timestamp': '2025-09-30 22:45:45.216481', 'step': 20713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:45.275309', 'step': 20713, 'epoch': 3} {'type': 'loss', 'content': 0.07080375403165817, 'timestamp': '2025-09-30 22:45:45.280281', 'step': 20714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:45.338161', 'step': 20714, 'epoch': 3} {'type': 'loss', 'content': 0.06732911616563797, 'timestamp': '2025-09-30 22:45:45.341369', 'step': 20715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:45.406192', 'step': 20715, 'epoch': 3} {'type': 'loss', 'content': 0.10778981447219849, 'timestamp': '2025-09-30 22:45:45.413034', 'step': 20716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:45.471784', 'step': 20716, 'epoch': 3} {'type': 'loss', 'content': 0.08008608222007751, 'timestamp': '2025-09-30 22:45:45.474662', 'step': 20717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:45.533824', 'step': 20717, 'epoch': 3} {'type': 'loss', 'content': 0.023169005289673805, 'timestamp': '2025-09-30 22:45:45.536610', 'step': 20718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:45.594807', 'step': 20718, 'epoch': 3} {'type': 'loss', 'content': 0.06095273047685623, 'timestamp': '2025-09-30 22:45:45.597449', 'step': 20719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:45.656597', 'step': 20719, 'epoch': 3} {'type': 'loss', 'content': 0.05211158096790314, 'timestamp': '2025-09-30 22:45:45.662832', 'step': 20720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:45.721996', 'step': 20720, 'epoch': 3} {'type': 'loss', 'content': 0.16953323781490326, 'timestamp': '2025-09-30 22:45:45.724957', 'step': 20721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:45.784195', 'step': 20721, 'epoch': 3} {'type': 'loss', 'content': 0.14643286168575287, 'timestamp': '2025-09-30 22:45:45.788174', 'step': 20722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:45.856669', 'step': 20722, 'epoch': 3} {'type': 'loss', 'content': 0.006415246520191431, 'timestamp': '2025-09-30 22:45:45.859467', 'step': 20723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:45.917346', 'step': 20723, 'epoch': 3} {'type': 'loss', 'content': 0.04583031311631203, 'timestamp': '2025-09-30 22:45:45.923995', 'step': 20724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:45.985817', 'step': 20724, 'epoch': 3} {'type': 'loss', 'content': 0.059683069586753845, 'timestamp': '2025-09-30 22:45:46.004212', 'step': 20725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:46.077760', 'step': 20725, 'epoch': 3} {'type': 'loss', 'content': 0.14450322091579437, 'timestamp': '2025-09-30 22:45:46.088757', 'step': 20726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:46.149165', 'step': 20726, 'epoch': 3} {'type': 'loss', 'content': 0.08099951595067978, 'timestamp': '2025-09-30 22:45:46.159295', 'step': 20727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:46.218366', 'step': 20727, 'epoch': 3} {'type': 'loss', 'content': 0.0864667221903801, 'timestamp': '2025-09-30 22:45:46.226456', 'step': 20728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:46.291456', 'step': 20728, 'epoch': 3} {'type': 'loss', 'content': 0.09793789684772491, 'timestamp': '2025-09-30 22:45:46.296954', 'step': 20729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:46.362483', 'step': 20729, 'epoch': 3} {'type': 'loss', 'content': 0.054334770888090134, 'timestamp': '2025-09-30 22:45:46.370515', 'step': 20730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:46.439385', 'step': 20730, 'epoch': 3} {'type': 'loss', 'content': 0.03447062149643898, 'timestamp': '2025-09-30 22:45:46.443211', 'step': 20731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:46.502018', 'step': 20731, 'epoch': 3} {'type': 'loss', 'content': 0.03380999714136124, 'timestamp': '2025-09-30 22:45:46.518585', 'step': 20732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:46.595945', 'step': 20732, 'epoch': 3} {'type': 'loss', 'content': 0.10541843622922897, 'timestamp': '2025-09-30 22:45:46.598662', 'step': 20733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:46.657859', 'step': 20733, 'epoch': 3} {'type': 'loss', 'content': 0.08915628492832184, 'timestamp': '2025-09-30 22:45:46.668293', 'step': 20734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:46.733647', 'step': 20734, 'epoch': 3} {'type': 'loss', 'content': 0.11153162270784378, 'timestamp': '2025-09-30 22:45:46.743129', 'step': 20735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:46.808255', 'step': 20735, 'epoch': 3} {'type': 'loss', 'content': 0.0839296504855156, 'timestamp': '2025-09-30 22:45:46.815915', 'step': 20736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:46.873467', 'step': 20736, 'epoch': 3} {'type': 'loss', 'content': 0.05393577367067337, 'timestamp': '2025-09-30 22:45:46.876779', 'step': 20737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:46.975485', 'step': 20737, 'epoch': 3} {'type': 'loss', 'content': 0.07378771901130676, 'timestamp': '2025-09-30 22:45:46.981237', 'step': 20738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:47.040917', 'step': 20738, 'epoch': 3} {'type': 'loss', 'content': 0.025526097044348717, 'timestamp': '2025-09-30 22:45:47.047319', 'step': 20739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:47.115428', 'step': 20739, 'epoch': 3} {'type': 'loss', 'content': 0.1359012871980667, 'timestamp': '2025-09-30 22:45:47.140066', 'step': 20740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:47.210888', 'step': 20740, 'epoch': 3} {'type': 'loss', 'content': 0.07056386768817902, 'timestamp': '2025-09-30 22:45:47.215590', 'step': 20741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:47.275526', 'step': 20741, 'epoch': 3} {'type': 'loss', 'content': 0.17178472876548767, 'timestamp': '2025-09-30 22:45:47.281494', 'step': 20742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:47.343104', 'step': 20742, 'epoch': 3} {'type': 'loss', 'content': 0.06837392598390579, 'timestamp': '2025-09-30 22:45:47.348646', 'step': 20743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:47.408097', 'step': 20743, 'epoch': 3} {'type': 'loss', 'content': 0.10087655484676361, 'timestamp': '2025-09-30 22:45:47.416393', 'step': 20744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:47.491227', 'step': 20744, 'epoch': 3} {'type': 'loss', 'content': 0.07350354641675949, 'timestamp': '2025-09-30 22:45:47.494499', 'step': 20745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:47.556843', 'step': 20745, 'epoch': 3} {'type': 'loss', 'content': 0.13861697912216187, 'timestamp': '2025-09-30 22:45:47.560747', 'step': 20746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:47.627308', 'step': 20746, 'epoch': 3} {'type': 'loss', 'content': 0.07526696473360062, 'timestamp': '2025-09-30 22:45:47.631247', 'step': 20747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:47.688048', 'step': 20747, 'epoch': 3} {'type': 'loss', 'content': 0.027313465252518654, 'timestamp': '2025-09-30 22:45:47.694931', 'step': 20748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:47.753181', 'step': 20748, 'epoch': 3} {'type': 'loss', 'content': 0.046746302396059036, 'timestamp': '2025-09-30 22:45:47.756180', 'step': 20749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:47.815751', 'step': 20749, 'epoch': 3} {'type': 'loss', 'content': 0.0898226872086525, 'timestamp': '2025-09-30 22:45:47.818462', 'step': 20750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:47.878656', 'step': 20750, 'epoch': 3} {'type': 'loss', 'content': 0.11781758069992065, 'timestamp': '2025-09-30 22:45:47.882144', 'step': 20751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:47.943531', 'step': 20751, 'epoch': 3} {'type': 'loss', 'content': 0.05076690763235092, 'timestamp': '2025-09-30 22:45:47.950387', 'step': 20752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:45:48.034620', 'step': 20752, 'epoch': 3} {'type': 'loss', 'content': 0.1203814446926117, 'timestamp': '2025-09-30 22:45:48.039939', 'step': 20753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:48.099351', 'step': 20753, 'epoch': 3} {'type': 'loss', 'content': 0.0794282779097557, 'timestamp': '2025-09-30 22:45:48.102736', 'step': 20754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:48.165444', 'step': 20754, 'epoch': 3} {'type': 'loss', 'content': 0.04173190891742706, 'timestamp': '2025-09-30 22:45:48.171245', 'step': 20755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:48.232106', 'step': 20755, 'epoch': 3} {'type': 'loss', 'content': 0.10975474119186401, 'timestamp': '2025-09-30 22:45:48.241099', 'step': 20756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:48.313495', 'step': 20756, 'epoch': 3} {'type': 'loss', 'content': 0.08563373982906342, 'timestamp': '2025-09-30 22:45:48.318351', 'step': 20757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:48.378688', 'step': 20757, 'epoch': 3} {'type': 'loss', 'content': 0.08266276866197586, 'timestamp': '2025-09-30 22:45:48.384050', 'step': 20758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:48.444468', 'step': 20758, 'epoch': 3} {'type': 'loss', 'content': 0.04498676583170891, 'timestamp': '2025-09-30 22:45:48.450025', 'step': 20759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:48.510064', 'step': 20759, 'epoch': 3} {'type': 'loss', 'content': 0.09118812531232834, 'timestamp': '2025-09-30 22:45:48.518231', 'step': 20760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:48.591102', 'step': 20760, 'epoch': 3} {'type': 'loss', 'content': 0.05536980181932449, 'timestamp': '2025-09-30 22:45:48.608017', 'step': 20761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:48.669750', 'step': 20761, 'epoch': 3} {'type': 'loss', 'content': 0.12483064085245132, 'timestamp': '2025-09-30 22:45:48.674336', 'step': 20762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:48.737332', 'step': 20762, 'epoch': 3} {'type': 'loss', 'content': 0.03123689815402031, 'timestamp': '2025-09-30 22:45:48.741729', 'step': 20763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:48.801810', 'step': 20763, 'epoch': 3} {'type': 'loss', 'content': 0.08117332309484482, 'timestamp': '2025-09-30 22:45:48.810869', 'step': 20764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:48.881515', 'step': 20764, 'epoch': 3} {'type': 'loss', 'content': 0.10369085520505905, 'timestamp': '2025-09-30 22:45:48.885449', 'step': 20765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:48.944943', 'step': 20765, 'epoch': 3} {'type': 'loss', 'content': 0.1326919049024582, 'timestamp': '2025-09-30 22:45:48.948580', 'step': 20766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:49.024187', 'step': 20766, 'epoch': 3} {'type': 'loss', 'content': 0.12002439796924591, 'timestamp': '2025-09-30 22:45:49.029639', 'step': 20767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:49.090743', 'step': 20767, 'epoch': 3} {'type': 'loss', 'content': 0.05530199408531189, 'timestamp': '2025-09-30 22:45:49.115890', 'step': 20768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:49.175829', 'step': 20768, 'epoch': 3} {'type': 'loss', 'content': 0.06808559596538544, 'timestamp': '2025-09-30 22:45:49.182039', 'step': 20769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:49.257900', 'step': 20769, 'epoch': 3} {'type': 'loss', 'content': 0.1446862816810608, 'timestamp': '2025-09-30 22:45:49.262984', 'step': 20770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:49.336384', 'step': 20770, 'epoch': 3} {'type': 'loss', 'content': 0.09541020542383194, 'timestamp': '2025-09-30 22:45:49.352992', 'step': 20771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:49.412745', 'step': 20771, 'epoch': 3} {'type': 'loss', 'content': 0.07618074119091034, 'timestamp': '2025-09-30 22:45:49.420767', 'step': 20772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:49.480968', 'step': 20772, 'epoch': 3} {'type': 'loss', 'content': 0.090943343937397, 'timestamp': '2025-09-30 22:45:49.498069', 'step': 20773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:49.557129', 'step': 20773, 'epoch': 3} {'type': 'loss', 'content': 0.06207648292183876, 'timestamp': '2025-09-30 22:45:49.569557', 'step': 20774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:49.643473', 'step': 20774, 'epoch': 3} {'type': 'loss', 'content': 0.12336888909339905, 'timestamp': '2025-09-30 22:45:49.647213', 'step': 20775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:49.714523', 'step': 20775, 'epoch': 3} {'type': 'loss', 'content': 0.040213681757450104, 'timestamp': '2025-09-30 22:45:49.721612', 'step': 20776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:49.799082', 'step': 20776, 'epoch': 3} {'type': 'loss', 'content': 0.07650953531265259, 'timestamp': '2025-09-30 22:45:49.814747', 'step': 20777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:49.888030', 'step': 20777, 'epoch': 3} {'type': 'loss', 'content': 0.0804082453250885, 'timestamp': '2025-09-30 22:45:49.891804', 'step': 20778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:49.961854', 'step': 20778, 'epoch': 3} {'type': 'loss', 'content': 0.07968778908252716, 'timestamp': '2025-09-30 22:45:49.965929', 'step': 20779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:50.053567', 'step': 20779, 'epoch': 3} {'type': 'loss', 'content': 0.16970963776111603, 'timestamp': '2025-09-30 22:45:50.075246', 'step': 20780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:50.145358', 'step': 20780, 'epoch': 3} {'type': 'loss', 'content': 0.10420739650726318, 'timestamp': '2025-09-30 22:45:50.154705', 'step': 20781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:50.224446', 'step': 20781, 'epoch': 3} {'type': 'loss', 'content': 0.10474422574043274, 'timestamp': '2025-09-30 22:45:50.229390', 'step': 20782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:50.291357', 'step': 20782, 'epoch': 3} {'type': 'loss', 'content': 0.06188057363033295, 'timestamp': '2025-09-30 22:45:50.295978', 'step': 20783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:50.366375', 'step': 20783, 'epoch': 3} {'type': 'loss', 'content': 0.03639460727572441, 'timestamp': '2025-09-30 22:45:50.382666', 'step': 20784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:50.441955', 'step': 20784, 'epoch': 3} {'type': 'loss', 'content': 0.10476431995630264, 'timestamp': '2025-09-30 22:45:50.447273', 'step': 20785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:50.515204', 'step': 20785, 'epoch': 3} {'type': 'loss', 'content': 0.07512965053319931, 'timestamp': '2025-09-30 22:45:50.520469', 'step': 20786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:50.580350', 'step': 20786, 'epoch': 3} {'type': 'loss', 'content': 0.07124015688896179, 'timestamp': '2025-09-30 22:45:50.596484', 'step': 20787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:50.657803', 'step': 20787, 'epoch': 3} {'type': 'loss', 'content': 0.12424418330192566, 'timestamp': '2025-09-30 22:45:50.676037', 'step': 20788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:50.734551', 'step': 20788, 'epoch': 3} {'type': 'loss', 'content': 0.0795830711722374, 'timestamp': '2025-09-30 22:45:50.737798', 'step': 20789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:50.807116', 'step': 20789, 'epoch': 3} {'type': 'loss', 'content': 0.17344585061073303, 'timestamp': '2025-09-30 22:45:50.811725', 'step': 20790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:50.881470', 'step': 20790, 'epoch': 3} {'type': 'loss', 'content': 0.11763426661491394, 'timestamp': '2025-09-30 22:45:50.886250', 'step': 20791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:50.967579', 'step': 20791, 'epoch': 3} {'type': 'loss', 'content': 0.08225035667419434, 'timestamp': '2025-09-30 22:45:50.987987', 'step': 20792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:51.084895', 'step': 20792, 'epoch': 3} {'type': 'loss', 'content': 0.08594605326652527, 'timestamp': '2025-09-30 22:45:51.088842', 'step': 20793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:51.155878', 'step': 20793, 'epoch': 3} {'type': 'loss', 'content': 0.03349024057388306, 'timestamp': '2025-09-30 22:45:51.168293', 'step': 20794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:51.248437', 'step': 20794, 'epoch': 3} {'type': 'loss', 'content': 0.10067825019359589, 'timestamp': '2025-09-30 22:45:51.260655', 'step': 20795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:51.344382', 'step': 20795, 'epoch': 3} {'type': 'loss', 'content': 0.0397174134850502, 'timestamp': '2025-09-30 22:45:51.359338', 'step': 20796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:51.423603', 'step': 20796, 'epoch': 3} {'type': 'loss', 'content': 0.06569115072488785, 'timestamp': '2025-09-30 22:45:51.427748', 'step': 20797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:51.488453', 'step': 20797, 'epoch': 3} {'type': 'loss', 'content': 0.050188127905130386, 'timestamp': '2025-09-30 22:45:51.501444', 'step': 20798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:51.569002', 'step': 20798, 'epoch': 3} {'type': 'loss', 'content': 0.10510452091693878, 'timestamp': '2025-09-30 22:45:51.572437', 'step': 20799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:51.629960', 'step': 20799, 'epoch': 3} {'type': 'loss', 'content': 0.09869127720594406, 'timestamp': '2025-09-30 22:45:51.646610', 'step': 20800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:51.708704', 'step': 20800, 'epoch': 3} {'type': 'loss', 'content': 0.14944849908351898, 'timestamp': '2025-09-30 22:45:51.713882', 'step': 20801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:51.776280', 'step': 20801, 'epoch': 3} {'type': 'loss', 'content': 0.1021837443113327, 'timestamp': '2025-09-30 22:45:51.783868', 'step': 20802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:51.844835', 'step': 20802, 'epoch': 3} {'type': 'loss', 'content': 0.08433482050895691, 'timestamp': '2025-09-30 22:45:51.853494', 'step': 20803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:51.920416', 'step': 20803, 'epoch': 3} {'type': 'loss', 'content': 0.06758453696966171, 'timestamp': '2025-09-30 22:45:51.941355', 'step': 20804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:52.004172', 'step': 20804, 'epoch': 3} {'type': 'loss', 'content': 0.040190692991018295, 'timestamp': '2025-09-30 22:45:52.009243', 'step': 20805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:52.074969', 'step': 20805, 'epoch': 3} {'type': 'loss', 'content': 0.06416948139667511, 'timestamp': '2025-09-30 22:45:52.078777', 'step': 20806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:52.139437', 'step': 20806, 'epoch': 3} {'type': 'loss', 'content': 0.062449026852846146, 'timestamp': '2025-09-30 22:45:52.143571', 'step': 20807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:52.203866', 'step': 20807, 'epoch': 3} {'type': 'loss', 'content': 0.1220705583691597, 'timestamp': '2025-09-30 22:45:52.211669', 'step': 20808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:52.269050', 'step': 20808, 'epoch': 3} {'type': 'loss', 'content': 0.19074174761772156, 'timestamp': '2025-09-30 22:45:52.273259', 'step': 20809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:52.332347', 'step': 20809, 'epoch': 3} {'type': 'loss', 'content': 0.07738325744867325, 'timestamp': '2025-09-30 22:45:52.335892', 'step': 20810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:52.401629', 'step': 20810, 'epoch': 3} {'type': 'loss', 'content': 0.1348138153553009, 'timestamp': '2025-09-30 22:45:52.418959', 'step': 20811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:52.502413', 'step': 20811, 'epoch': 3} {'type': 'loss', 'content': 0.05728977173566818, 'timestamp': '2025-09-30 22:45:52.511393', 'step': 20812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:52.570498', 'step': 20812, 'epoch': 3} {'type': 'loss', 'content': 0.07756196707487106, 'timestamp': '2025-09-30 22:45:52.575979', 'step': 20813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:52.649546', 'step': 20813, 'epoch': 3} {'type': 'loss', 'content': 0.042187485843896866, 'timestamp': '2025-09-30 22:45:52.658412', 'step': 20814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:52.719163', 'step': 20814, 'epoch': 3} {'type': 'loss', 'content': 0.12323051691055298, 'timestamp': '2025-09-30 22:45:52.727140', 'step': 20815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:52.802530', 'step': 20815, 'epoch': 3} {'type': 'loss', 'content': 0.09615237265825272, 'timestamp': '2025-09-30 22:45:52.811840', 'step': 20816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:52.871485', 'step': 20816, 'epoch': 3} {'type': 'loss', 'content': 0.10561817139387131, 'timestamp': '2025-09-30 22:45:52.876749', 'step': 20817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:52.936284', 'step': 20817, 'epoch': 3} {'type': 'loss', 'content': 0.06917097419500351, 'timestamp': '2025-09-30 22:45:52.943428', 'step': 20818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:53.004977', 'step': 20818, 'epoch': 3} {'type': 'loss', 'content': 0.03714150935411453, 'timestamp': '2025-09-30 22:45:53.008644', 'step': 20819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:53.069260', 'step': 20819, 'epoch': 3} {'type': 'loss', 'content': 0.11680693179368973, 'timestamp': '2025-09-30 22:45:53.075917', 'step': 20820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:53.134903', 'step': 20820, 'epoch': 3} {'type': 'loss', 'content': 0.0747227892279625, 'timestamp': '2025-09-30 22:45:53.137842', 'step': 20821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:53.195750', 'step': 20821, 'epoch': 3} {'type': 'loss', 'content': 0.038921765983104706, 'timestamp': '2025-09-30 22:45:53.201002', 'step': 20822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:53.262191', 'step': 20822, 'epoch': 3} {'type': 'loss', 'content': 0.04872254654765129, 'timestamp': '2025-09-30 22:45:53.267491', 'step': 20823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:53.354347', 'step': 20823, 'epoch': 3} {'type': 'loss', 'content': 0.23084595799446106, 'timestamp': '2025-09-30 22:45:53.361749', 'step': 20824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:53.418752', 'step': 20824, 'epoch': 3} {'type': 'loss', 'content': 0.13548167049884796, 'timestamp': '2025-09-30 22:45:53.422220', 'step': 20825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:53.494313', 'step': 20825, 'epoch': 3} {'type': 'loss', 'content': 0.11873134225606918, 'timestamp': '2025-09-30 22:45:53.499844', 'step': 20826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:53.557818', 'step': 20826, 'epoch': 3} {'type': 'loss', 'content': 0.08507955819368362, 'timestamp': '2025-09-30 22:45:53.562315', 'step': 20827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:53.621249', 'step': 20827, 'epoch': 3} {'type': 'loss', 'content': 0.11249466240406036, 'timestamp': '2025-09-30 22:45:53.628836', 'step': 20828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:53.689757', 'step': 20828, 'epoch': 3} {'type': 'loss', 'content': 0.07994665205478668, 'timestamp': '2025-09-30 22:45:53.695768', 'step': 20829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:53.754280', 'step': 20829, 'epoch': 3} {'type': 'loss', 'content': 0.09775030612945557, 'timestamp': '2025-09-30 22:45:53.759291', 'step': 20830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:53.818644', 'step': 20830, 'epoch': 3} {'type': 'loss', 'content': 0.07059668004512787, 'timestamp': '2025-09-30 22:45:53.831612', 'step': 20831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:53.905719', 'step': 20831, 'epoch': 3} {'type': 'loss', 'content': 0.09890662133693695, 'timestamp': '2025-09-30 22:45:53.913945', 'step': 20832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:53.982796', 'step': 20832, 'epoch': 3} {'type': 'loss', 'content': 0.07811322063207626, 'timestamp': '2025-09-30 22:45:53.987967', 'step': 20833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:54.046076', 'step': 20833, 'epoch': 3} {'type': 'loss', 'content': 0.17645835876464844, 'timestamp': '2025-09-30 22:45:54.052735', 'step': 20834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:54.112712', 'step': 20834, 'epoch': 3} {'type': 'loss', 'content': 0.1274585872888565, 'timestamp': '2025-09-30 22:45:54.116539', 'step': 20835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:54.178589', 'step': 20835, 'epoch': 3} {'type': 'loss', 'content': 0.09667501598596573, 'timestamp': '2025-09-30 22:45:54.185299', 'step': 20836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:54.243639', 'step': 20836, 'epoch': 3} {'type': 'loss', 'content': 0.08013777434825897, 'timestamp': '2025-09-30 22:45:54.246802', 'step': 20837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:54.320314', 'step': 20837, 'epoch': 3} {'type': 'loss', 'content': 0.0216353889554739, 'timestamp': '2025-09-30 22:45:54.323911', 'step': 20838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:54.382810', 'step': 20838, 'epoch': 3} {'type': 'loss', 'content': 0.11545375734567642, 'timestamp': '2025-09-30 22:45:54.388406', 'step': 20839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:54.463278', 'step': 20839, 'epoch': 3} {'type': 'loss', 'content': 0.1077839657664299, 'timestamp': '2025-09-30 22:45:54.470438', 'step': 20840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:54.543395', 'step': 20840, 'epoch': 3} {'type': 'loss', 'content': 0.0755615085363388, 'timestamp': '2025-09-30 22:45:54.548205', 'step': 20841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:54.608213', 'step': 20841, 'epoch': 3} {'type': 'loss', 'content': 0.1006748154759407, 'timestamp': '2025-09-30 22:45:54.614623', 'step': 20842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:54.673882', 'step': 20842, 'epoch': 3} {'type': 'loss', 'content': 0.13573555648326874, 'timestamp': '2025-09-30 22:45:54.677016', 'step': 20843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:54.737668', 'step': 20843, 'epoch': 3} {'type': 'loss', 'content': 0.06698115915060043, 'timestamp': '2025-09-30 22:45:54.746315', 'step': 20844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:54.813542', 'step': 20844, 'epoch': 3} {'type': 'loss', 'content': 0.023947300389409065, 'timestamp': '2025-09-30 22:45:54.818390', 'step': 20845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:54.898602', 'step': 20845, 'epoch': 3} {'type': 'loss', 'content': 0.07552056759595871, 'timestamp': '2025-09-30 22:45:54.901642', 'step': 20846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:54.969920', 'step': 20846, 'epoch': 3} {'type': 'loss', 'content': 0.06720951199531555, 'timestamp': '2025-09-30 22:45:54.978910', 'step': 20847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:55.048320', 'step': 20847, 'epoch': 3} {'type': 'loss', 'content': 0.024979447945952415, 'timestamp': '2025-09-30 22:45:55.062302', 'step': 20848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:55.127057', 'step': 20848, 'epoch': 3} {'type': 'loss', 'content': 0.091090127825737, 'timestamp': '2025-09-30 22:45:55.130645', 'step': 20849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:55.189781', 'step': 20849, 'epoch': 3} {'type': 'loss', 'content': 0.0404626838862896, 'timestamp': '2025-09-30 22:45:55.204947', 'step': 20850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:55.275827', 'step': 20850, 'epoch': 3} {'type': 'loss', 'content': 0.08745647221803665, 'timestamp': '2025-09-30 22:45:55.291449', 'step': 20851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:55.364826', 'step': 20851, 'epoch': 3} {'type': 'loss', 'content': 0.10477714240550995, 'timestamp': '2025-09-30 22:45:55.379467', 'step': 20852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:55.459297', 'step': 20852, 'epoch': 3} {'type': 'loss', 'content': 0.14238795638084412, 'timestamp': '2025-09-30 22:45:55.468559', 'step': 20853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:55.531507', 'step': 20853, 'epoch': 3} {'type': 'loss', 'content': 0.09059732407331467, 'timestamp': '2025-09-30 22:45:55.538352', 'step': 20854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:55.600203', 'step': 20854, 'epoch': 3} {'type': 'loss', 'content': 0.08731301128864288, 'timestamp': '2025-09-30 22:45:55.607190', 'step': 20855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:55.674834', 'step': 20855, 'epoch': 3} {'type': 'loss', 'content': 0.06608474999666214, 'timestamp': '2025-09-30 22:45:55.690618', 'step': 20856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:55.763109', 'step': 20856, 'epoch': 3} {'type': 'loss', 'content': 0.0660649985074997, 'timestamp': '2025-09-30 22:45:55.766644', 'step': 20857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:55.827179', 'step': 20857, 'epoch': 3} {'type': 'loss', 'content': 0.08381767570972443, 'timestamp': '2025-09-30 22:45:55.840406', 'step': 20858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:55.912306', 'step': 20858, 'epoch': 3} {'type': 'loss', 'content': 0.08926248550415039, 'timestamp': '2025-09-30 22:45:55.917017', 'step': 20859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:55.985342', 'step': 20859, 'epoch': 3} {'type': 'loss', 'content': 0.15572066605091095, 'timestamp': '2025-09-30 22:45:55.992958', 'step': 20860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:56.073952', 'step': 20860, 'epoch': 3} {'type': 'loss', 'content': 0.1275436133146286, 'timestamp': '2025-09-30 22:45:56.084765', 'step': 20861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:56.153233', 'step': 20861, 'epoch': 3} {'type': 'loss', 'content': 0.08354847133159637, 'timestamp': '2025-09-30 22:45:56.164721', 'step': 20862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:56.231996', 'step': 20862, 'epoch': 3} {'type': 'loss', 'content': 0.043072156608104706, 'timestamp': '2025-09-30 22:45:56.242687', 'step': 20863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:56.301320', 'step': 20863, 'epoch': 3} {'type': 'loss', 'content': 0.05927889049053192, 'timestamp': '2025-09-30 22:45:56.314270', 'step': 20864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:56.380283', 'step': 20864, 'epoch': 3} {'type': 'loss', 'content': 0.16881825029850006, 'timestamp': '2025-09-30 22:45:56.384498', 'step': 20865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:56.460436', 'step': 20865, 'epoch': 3} {'type': 'loss', 'content': 0.06469900161027908, 'timestamp': '2025-09-30 22:45:56.464742', 'step': 20866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:56.540254', 'step': 20866, 'epoch': 3} {'type': 'loss', 'content': 0.08260337263345718, 'timestamp': '2025-09-30 22:45:56.549042', 'step': 20867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:56.617608', 'step': 20867, 'epoch': 3} {'type': 'loss', 'content': 0.12550140917301178, 'timestamp': '2025-09-30 22:45:56.632882', 'step': 20868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:56.698061', 'step': 20868, 'epoch': 3} {'type': 'loss', 'content': 0.07511588931083679, 'timestamp': '2025-09-30 22:45:56.708562', 'step': 20869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:56.785427', 'step': 20869, 'epoch': 3} {'type': 'loss', 'content': 0.1331215649843216, 'timestamp': '2025-09-30 22:45:56.790418', 'step': 20870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:56.859539', 'step': 20870, 'epoch': 3} {'type': 'loss', 'content': 0.08268583565950394, 'timestamp': '2025-09-30 22:45:56.863351', 'step': 20871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:56.931732', 'step': 20871, 'epoch': 3} {'type': 'loss', 'content': 0.12090768665075302, 'timestamp': '2025-09-30 22:45:56.939921', 'step': 20872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:57.012914', 'step': 20872, 'epoch': 3} {'type': 'loss', 'content': 0.15417370200157166, 'timestamp': '2025-09-30 22:45:57.024326', 'step': 20873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:57.098633', 'step': 20873, 'epoch': 3} {'type': 'loss', 'content': 0.06482303142547607, 'timestamp': '2025-09-30 22:45:57.106870', 'step': 20874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:57.168833', 'step': 20874, 'epoch': 3} {'type': 'loss', 'content': 0.05611288920044899, 'timestamp': '2025-09-30 22:45:57.172453', 'step': 20875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:57.236404', 'step': 20875, 'epoch': 3} {'type': 'loss', 'content': 0.05121718347072601, 'timestamp': '2025-09-30 22:45:57.243797', 'step': 20876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:57.315116', 'step': 20876, 'epoch': 3} {'type': 'loss', 'content': 0.09909163415431976, 'timestamp': '2025-09-30 22:45:57.323138', 'step': 20877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:57.401666', 'step': 20877, 'epoch': 3} {'type': 'loss', 'content': 0.07934629917144775, 'timestamp': '2025-09-30 22:45:57.405750', 'step': 20878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:57.463507', 'step': 20878, 'epoch': 3} {'type': 'loss', 'content': 0.138717383146286, 'timestamp': '2025-09-30 22:45:57.467343', 'step': 20879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:57.551781', 'step': 20879, 'epoch': 3} {'type': 'loss', 'content': 0.04204876348376274, 'timestamp': '2025-09-30 22:45:57.558445', 'step': 20880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:57.615828', 'step': 20880, 'epoch': 3} {'type': 'loss', 'content': 0.1003977358341217, 'timestamp': '2025-09-30 22:45:57.634054', 'step': 20881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:57.708089', 'step': 20881, 'epoch': 3} {'type': 'loss', 'content': 0.06733972579240799, 'timestamp': '2025-09-30 22:45:57.712465', 'step': 20882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:57.788889', 'step': 20882, 'epoch': 3} {'type': 'loss', 'content': 0.016743365675210953, 'timestamp': '2025-09-30 22:45:57.793056', 'step': 20883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:57.861524', 'step': 20883, 'epoch': 3} {'type': 'loss', 'content': 0.03144637867808342, 'timestamp': '2025-09-30 22:45:57.883768', 'step': 20884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:57.961927', 'step': 20884, 'epoch': 3} {'type': 'loss', 'content': 0.07940781116485596, 'timestamp': '2025-09-30 22:45:57.967959', 'step': 20885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:58.033991', 'step': 20885, 'epoch': 3} {'type': 'loss', 'content': 0.08098460733890533, 'timestamp': '2025-09-30 22:45:58.038661', 'step': 20886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:58.100694', 'step': 20886, 'epoch': 3} {'type': 'loss', 'content': 0.04642339050769806, 'timestamp': '2025-09-30 22:45:58.105489', 'step': 20887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:58.175844', 'step': 20887, 'epoch': 3} {'type': 'loss', 'content': 0.11269962787628174, 'timestamp': '2025-09-30 22:45:58.184413', 'step': 20888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:58.257030', 'step': 20888, 'epoch': 3} {'type': 'loss', 'content': 0.060232289135456085, 'timestamp': '2025-09-30 22:45:58.265252', 'step': 20889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:58.328814', 'step': 20889, 'epoch': 3} {'type': 'loss', 'content': 0.08779920637607574, 'timestamp': '2025-09-30 22:45:58.341379', 'step': 20890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:45:58.410141', 'step': 20890, 'epoch': 3} {'type': 'loss', 'content': 0.05477255582809448, 'timestamp': '2025-09-30 22:45:58.415105', 'step': 20891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 4160025321280.0}, 'timestamp': '2025-09-30 22:45:58.476415', 'step': 20891, 'epoch': 3} {'type': 'loss', 'content': 0.05655544623732567, 'timestamp': '2025-09-30 22:45:58.483009', 'step': 20892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:58.541311', 'step': 20892, 'epoch': 3} {'type': 'loss', 'content': 0.11473261564970016, 'timestamp': '2025-09-30 22:45:58.553148', 'step': 20893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:58.616088', 'step': 20893, 'epoch': 3} {'type': 'loss', 'content': 0.07008474320173264, 'timestamp': '2025-09-30 22:45:58.619269', 'step': 20894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:58.681807', 'step': 20894, 'epoch': 3} {'type': 'loss', 'content': 0.12965723872184753, 'timestamp': '2025-09-30 22:45:58.687978', 'step': 20895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:58.751337', 'step': 20895, 'epoch': 3} {'type': 'loss', 'content': 0.08584582060575485, 'timestamp': '2025-09-30 22:45:58.758596', 'step': 20896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:58.835820', 'step': 20896, 'epoch': 3} {'type': 'loss', 'content': 0.10481438785791397, 'timestamp': '2025-09-30 22:45:58.838574', 'step': 20897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:58.902960', 'step': 20897, 'epoch': 3} {'type': 'loss', 'content': 0.10409468412399292, 'timestamp': '2025-09-30 22:45:58.906329', 'step': 20898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:45:58.963607', 'step': 20898, 'epoch': 3} {'type': 'loss', 'content': 0.12552006542682648, 'timestamp': '2025-09-30 22:45:58.966460', 'step': 20899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:45:59.028455', 'step': 20899, 'epoch': 3} {'type': 'loss', 'content': 0.09467780590057373, 'timestamp': '2025-09-30 22:45:59.043579', 'step': 20900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:59.101735', 'step': 20900, 'epoch': 3} {'type': 'loss', 'content': 0.1322198063135147, 'timestamp': '2025-09-30 22:45:59.105349', 'step': 20901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:59.173903', 'step': 20901, 'epoch': 3} {'type': 'loss', 'content': 0.09740806370973587, 'timestamp': '2025-09-30 22:45:59.179885', 'step': 20902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:59.258109', 'step': 20902, 'epoch': 3} {'type': 'loss', 'content': 0.13029049336910248, 'timestamp': '2025-09-30 22:45:59.262756', 'step': 20903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:59.325257', 'step': 20903, 'epoch': 3} {'type': 'loss', 'content': 0.14986827969551086, 'timestamp': '2025-09-30 22:45:59.346968', 'step': 20904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:59.409061', 'step': 20904, 'epoch': 3} {'type': 'loss', 'content': 0.08660342544317245, 'timestamp': '2025-09-30 22:45:59.416322', 'step': 20905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:59.476371', 'step': 20905, 'epoch': 3} {'type': 'loss', 'content': 0.07809937000274658, 'timestamp': '2025-09-30 22:45:59.490058', 'step': 20906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:59.548223', 'step': 20906, 'epoch': 3} {'type': 'loss', 'content': 0.03802318125963211, 'timestamp': '2025-09-30 22:45:59.553282', 'step': 20907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:45:59.635894', 'step': 20907, 'epoch': 3} {'type': 'loss', 'content': 0.08308197557926178, 'timestamp': '2025-09-30 22:45:59.643536', 'step': 20908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:45:59.709959', 'step': 20908, 'epoch': 3} {'type': 'loss', 'content': 0.046895600855350494, 'timestamp': '2025-09-30 22:45:59.714220', 'step': 20909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:59.781212', 'step': 20909, 'epoch': 3} {'type': 'loss', 'content': 0.10818014293909073, 'timestamp': '2025-09-30 22:45:59.786430', 'step': 20910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:59.846793', 'step': 20910, 'epoch': 3} {'type': 'loss', 'content': 0.07035849243402481, 'timestamp': '2025-09-30 22:45:59.860357', 'step': 20911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:45:59.925857', 'step': 20911, 'epoch': 3} {'type': 'loss', 'content': 0.04184826835989952, 'timestamp': '2025-09-30 22:45:59.936589', 'step': 20912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:45:59.992861', 'step': 20912, 'epoch': 3} {'type': 'loss', 'content': 0.07710219919681549, 'timestamp': '2025-09-30 22:45:59.995750', 'step': 20913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:00.066783', 'step': 20913, 'epoch': 3} {'type': 'loss', 'content': 0.11755482852458954, 'timestamp': '2025-09-30 22:46:00.069447', 'step': 20914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:00.137407', 'step': 20914, 'epoch': 3} {'type': 'loss', 'content': 0.15308691561222076, 'timestamp': '2025-09-30 22:46:00.141346', 'step': 20915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:00.214566', 'step': 20915, 'epoch': 3} {'type': 'loss', 'content': 0.07337912172079086, 'timestamp': '2025-09-30 22:46:00.222880', 'step': 20916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:00.282304', 'step': 20916, 'epoch': 3} {'type': 'loss', 'content': 0.09305864572525024, 'timestamp': '2025-09-30 22:46:00.287472', 'step': 20917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:00.353675', 'step': 20917, 'epoch': 3} {'type': 'loss', 'content': 0.10122993588447571, 'timestamp': '2025-09-30 22:46:00.358932', 'step': 20918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:00.421102', 'step': 20918, 'epoch': 3} {'type': 'loss', 'content': 0.14820297062397003, 'timestamp': '2025-09-30 22:46:00.424881', 'step': 20919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:00.509589', 'step': 20919, 'epoch': 3} {'type': 'loss', 'content': 0.14664936065673828, 'timestamp': '2025-09-30 22:46:00.516574', 'step': 20920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:00.574649', 'step': 20920, 'epoch': 3} {'type': 'loss', 'content': 0.09747016429901123, 'timestamp': '2025-09-30 22:46:00.578638', 'step': 20921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:00.636598', 'step': 20921, 'epoch': 3} {'type': 'loss', 'content': 0.08453977108001709, 'timestamp': '2025-09-30 22:46:00.639538', 'step': 20922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:00.701915', 'step': 20922, 'epoch': 3} {'type': 'loss', 'content': 0.10833252221345901, 'timestamp': '2025-09-30 22:46:00.707297', 'step': 20923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:00.778995', 'step': 20923, 'epoch': 3} {'type': 'loss', 'content': 0.08846688270568848, 'timestamp': '2025-09-30 22:46:00.788368', 'step': 20924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:00.849629', 'step': 20924, 'epoch': 3} {'type': 'loss', 'content': 0.1067497730255127, 'timestamp': '2025-09-30 22:46:00.853460', 'step': 20925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:00.911095', 'step': 20925, 'epoch': 3} {'type': 'loss', 'content': 0.05605484917759895, 'timestamp': '2025-09-30 22:46:00.914713', 'step': 20926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:00.991483', 'step': 20926, 'epoch': 3} {'type': 'loss', 'content': 0.1491537094116211, 'timestamp': '2025-09-30 22:46:00.994841', 'step': 20927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:01.054205', 'step': 20927, 'epoch': 3} {'type': 'loss', 'content': 0.05307526886463165, 'timestamp': '2025-09-30 22:46:01.060819', 'step': 20928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:01.122821', 'step': 20928, 'epoch': 3} {'type': 'loss', 'content': 0.07158178836107254, 'timestamp': '2025-09-30 22:46:01.126256', 'step': 20929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:01.187793', 'step': 20929, 'epoch': 3} {'type': 'loss', 'content': 0.1162673830986023, 'timestamp': '2025-09-30 22:46:01.191494', 'step': 20930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:01.262733', 'step': 20930, 'epoch': 3} {'type': 'loss', 'content': 0.11667202413082123, 'timestamp': '2025-09-30 22:46:01.267989', 'step': 20931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:01.342961', 'step': 20931, 'epoch': 3} {'type': 'loss', 'content': 0.03436756879091263, 'timestamp': '2025-09-30 22:46:01.350999', 'step': 20932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:01.441519', 'step': 20932, 'epoch': 3} {'type': 'loss', 'content': 0.11331483721733093, 'timestamp': '2025-09-30 22:46:01.445174', 'step': 20933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:01.529894', 'step': 20933, 'epoch': 3} {'type': 'loss', 'content': 0.09681521356105804, 'timestamp': '2025-09-30 22:46:01.533912', 'step': 20934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:46:01.592566', 'step': 20934, 'epoch': 3} {'type': 'loss', 'content': 0.06523972004652023, 'timestamp': '2025-09-30 22:46:01.596564', 'step': 20935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:01.656131', 'step': 20935, 'epoch': 3} {'type': 'loss', 'content': 0.07686113566160202, 'timestamp': '2025-09-30 22:46:01.663199', 'step': 20936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:01.720944', 'step': 20936, 'epoch': 3} {'type': 'loss', 'content': 0.08510437607765198, 'timestamp': '2025-09-30 22:46:01.735076', 'step': 20937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:01.813145', 'step': 20937, 'epoch': 3} {'type': 'loss', 'content': 0.06043660640716553, 'timestamp': '2025-09-30 22:46:01.820203', 'step': 20938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:01.880324', 'step': 20938, 'epoch': 3} {'type': 'loss', 'content': 0.050182800740003586, 'timestamp': '2025-09-30 22:46:01.883348', 'step': 20939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:01.954431', 'step': 20939, 'epoch': 3} {'type': 'loss', 'content': 0.14422182738780975, 'timestamp': '2025-09-30 22:46:01.961148', 'step': 20940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:02.020524', 'step': 20940, 'epoch': 3} {'type': 'loss', 'content': 0.0819244533777237, 'timestamp': '2025-09-30 22:46:02.024725', 'step': 20941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:02.086151', 'step': 20941, 'epoch': 3} {'type': 'loss', 'content': 0.08269453048706055, 'timestamp': '2025-09-30 22:46:02.090751', 'step': 20942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:02.162799', 'step': 20942, 'epoch': 3} {'type': 'loss', 'content': 0.07983127236366272, 'timestamp': '2025-09-30 22:46:02.166996', 'step': 20943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:02.231281', 'step': 20943, 'epoch': 3} {'type': 'loss', 'content': 0.09652768075466156, 'timestamp': '2025-09-30 22:46:02.240338', 'step': 20944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:02.308001', 'step': 20944, 'epoch': 3} {'type': 'loss', 'content': 0.05279466509819031, 'timestamp': '2025-09-30 22:46:02.311327', 'step': 20945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:02.369384', 'step': 20945, 'epoch': 3} {'type': 'loss', 'content': 0.07732605934143066, 'timestamp': '2025-09-30 22:46:02.372997', 'step': 20946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:02.458959', 'step': 20946, 'epoch': 3} {'type': 'loss', 'content': 0.05212898552417755, 'timestamp': '2025-09-30 22:46:02.469866', 'step': 20947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:02.530777', 'step': 20947, 'epoch': 3} {'type': 'loss', 'content': 0.08027492463588715, 'timestamp': '2025-09-30 22:46:02.538183', 'step': 20948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:02.597658', 'step': 20948, 'epoch': 3} {'type': 'loss', 'content': 0.10535567998886108, 'timestamp': '2025-09-30 22:46:02.601561', 'step': 20949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:02.659867', 'step': 20949, 'epoch': 3} {'type': 'loss', 'content': 0.08815164119005203, 'timestamp': '2025-09-30 22:46:02.668242', 'step': 20950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:02.732458', 'step': 20950, 'epoch': 3} {'type': 'loss', 'content': 0.07169847190380096, 'timestamp': '2025-09-30 22:46:02.736095', 'step': 20951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:02.794348', 'step': 20951, 'epoch': 3} {'type': 'loss', 'content': 0.0864887461066246, 'timestamp': '2025-09-30 22:46:02.801196', 'step': 20952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:02.863366', 'step': 20952, 'epoch': 3} {'type': 'loss', 'content': 0.07172172516584396, 'timestamp': '2025-09-30 22:46:02.868759', 'step': 20953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:02.944079', 'step': 20953, 'epoch': 3} {'type': 'loss', 'content': 0.09933611005544662, 'timestamp': '2025-09-30 22:46:02.949173', 'step': 20954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:03.009742', 'step': 20954, 'epoch': 3} {'type': 'loss', 'content': 0.12174047529697418, 'timestamp': '2025-09-30 22:46:03.015140', 'step': 20955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:03.093993', 'step': 20955, 'epoch': 3} {'type': 'loss', 'content': 0.18331177532672882, 'timestamp': '2025-09-30 22:46:03.101498', 'step': 20956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:03.185116', 'step': 20956, 'epoch': 3} {'type': 'loss', 'content': 0.044564493000507355, 'timestamp': '2025-09-30 22:46:03.189546', 'step': 20957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:03.278562', 'step': 20957, 'epoch': 3} {'type': 'loss', 'content': 0.09147579967975616, 'timestamp': '2025-09-30 22:46:03.283374', 'step': 20958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:03.356964', 'step': 20958, 'epoch': 3} {'type': 'loss', 'content': 0.10036986321210861, 'timestamp': '2025-09-30 22:46:03.378570', 'step': 20959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:03.449964', 'step': 20959, 'epoch': 3} {'type': 'loss', 'content': 0.07539842277765274, 'timestamp': '2025-09-30 22:46:03.458466', 'step': 20960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:03.563101', 'step': 20960, 'epoch': 3} {'type': 'loss', 'content': 0.13306286931037903, 'timestamp': '2025-09-30 22:46:03.566836', 'step': 20961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:03.645002', 'step': 20961, 'epoch': 3} {'type': 'loss', 'content': 0.041999898850917816, 'timestamp': '2025-09-30 22:46:03.662537', 'step': 20962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:03.725881', 'step': 20962, 'epoch': 3} {'type': 'loss', 'content': 0.07307974249124527, 'timestamp': '2025-09-30 22:46:03.728890', 'step': 20963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:03.818081', 'step': 20963, 'epoch': 3} {'type': 'loss', 'content': 0.04973294585943222, 'timestamp': '2025-09-30 22:46:03.826471', 'step': 20964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:03.903247', 'step': 20964, 'epoch': 3} {'type': 'loss', 'content': 0.09801523387432098, 'timestamp': '2025-09-30 22:46:03.908571', 'step': 20965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:03.989270', 'step': 20965, 'epoch': 3} {'type': 'loss', 'content': 0.0914529412984848, 'timestamp': '2025-09-30 22:46:03.994533', 'step': 20966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:04.082743', 'step': 20966, 'epoch': 3} {'type': 'loss', 'content': 0.06246897950768471, 'timestamp': '2025-09-30 22:46:04.089127', 'step': 20967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:04.197351', 'step': 20967, 'epoch': 3} {'type': 'loss', 'content': 0.05433963984251022, 'timestamp': '2025-09-30 22:46:04.221266', 'step': 20968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:04.296340', 'step': 20968, 'epoch': 3} {'type': 'loss', 'content': 0.06234598532319069, 'timestamp': '2025-09-30 22:46:04.304580', 'step': 20969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:04.375257', 'step': 20969, 'epoch': 3} {'type': 'loss', 'content': 0.11321339011192322, 'timestamp': '2025-09-30 22:46:04.380858', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:46:21.014904', 'step': 20970, 'epoch': 3} {'type': 'pplx', 'content': 7784.970201489714, 'timestamp': '2025-09-30 22:46:21.021373', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:21.081094', 'step': 20970, 'epoch': 3} {'type': 'loss', 'content': 0.16134034097194672, 'timestamp': '2025-09-30 22:46:21.085693', 'step': 20971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:21.148212', 'step': 20971, 'epoch': 3} {'type': 'loss', 'content': 0.05592957139015198, 'timestamp': '2025-09-30 22:46:21.156715', 'step': 20972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:21.230400', 'step': 20972, 'epoch': 3} {'type': 'loss', 'content': 0.04084951803088188, 'timestamp': '2025-09-30 22:46:21.235275', 'step': 20973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:21.317954', 'step': 20973, 'epoch': 3} {'type': 'loss', 'content': 0.07301367074251175, 'timestamp': '2025-09-30 22:46:21.330848', 'step': 20974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:21.406248', 'step': 20974, 'epoch': 3} {'type': 'loss', 'content': 0.10257989168167114, 'timestamp': '2025-09-30 22:46:21.421908', 'step': 20975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:21.483209', 'step': 20975, 'epoch': 3} {'type': 'loss', 'content': 0.07746679335832596, 'timestamp': '2025-09-30 22:46:21.491052', 'step': 20976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:21.552553', 'step': 20976, 'epoch': 3} {'type': 'loss', 'content': 0.07024900615215302, 'timestamp': '2025-09-30 22:46:21.565875', 'step': 20977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:21.633500', 'step': 20977, 'epoch': 3} {'type': 'loss', 'content': 0.07556165754795074, 'timestamp': '2025-09-30 22:46:21.638269', 'step': 20978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:21.711177', 'step': 20978, 'epoch': 3} {'type': 'loss', 'content': 0.2123594731092453, 'timestamp': '2025-09-30 22:46:21.716020', 'step': 20979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:21.786148', 'step': 20979, 'epoch': 3} {'type': 'loss', 'content': 0.10436023026704788, 'timestamp': '2025-09-30 22:46:21.794702', 'step': 20980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:21.878510', 'step': 20980, 'epoch': 3} {'type': 'loss', 'content': 0.09054802358150482, 'timestamp': '2025-09-30 22:46:21.881941', 'step': 20981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:21.955601', 'step': 20981, 'epoch': 3} {'type': 'loss', 'content': 0.15531425178050995, 'timestamp': '2025-09-30 22:46:21.961662', 'step': 20982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:22.041080', 'step': 20982, 'epoch': 3} {'type': 'loss', 'content': 0.027779068797826767, 'timestamp': '2025-09-30 22:46:22.045416', 'step': 20983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:22.116659', 'step': 20983, 'epoch': 3} {'type': 'loss', 'content': 0.06288326531648636, 'timestamp': '2025-09-30 22:46:22.124027', 'step': 20984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:22.184375', 'step': 20984, 'epoch': 3} {'type': 'loss', 'content': 0.10463821142911911, 'timestamp': '2025-09-30 22:46:22.192450', 'step': 20985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:22.255145', 'step': 20985, 'epoch': 3} {'type': 'loss', 'content': 0.026229657232761383, 'timestamp': '2025-09-30 22:46:22.258864', 'step': 20986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:22.325871', 'step': 20986, 'epoch': 3} {'type': 'loss', 'content': 0.11698475480079651, 'timestamp': '2025-09-30 22:46:22.330641', 'step': 20987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:22.403242', 'step': 20987, 'epoch': 3} {'type': 'loss', 'content': 0.10519742965698242, 'timestamp': '2025-09-30 22:46:22.411520', 'step': 20988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:22.471943', 'step': 20988, 'epoch': 3} {'type': 'loss', 'content': 0.05032942816615105, 'timestamp': '2025-09-30 22:46:22.476522', 'step': 20989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:22.536774', 'step': 20989, 'epoch': 3} {'type': 'loss', 'content': 0.08126921951770782, 'timestamp': '2025-09-30 22:46:22.540996', 'step': 20990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:22.616691', 'step': 20990, 'epoch': 3} {'type': 'loss', 'content': 0.1422738879919052, 'timestamp': '2025-09-30 22:46:22.621240', 'step': 20991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:22.685387', 'step': 20991, 'epoch': 3} {'type': 'loss', 'content': 0.0686158835887909, 'timestamp': '2025-09-30 22:46:22.692907', 'step': 20992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:22.762063', 'step': 20992, 'epoch': 3} {'type': 'loss', 'content': 0.06001610308885574, 'timestamp': '2025-09-30 22:46:22.766571', 'step': 20993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:22.829261', 'step': 20993, 'epoch': 3} {'type': 'loss', 'content': 0.0788324624300003, 'timestamp': '2025-09-30 22:46:22.833506', 'step': 20994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:22.894842', 'step': 20994, 'epoch': 3} {'type': 'loss', 'content': 0.07651665061712265, 'timestamp': '2025-09-30 22:46:22.903397', 'step': 20995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:22.980387', 'step': 20995, 'epoch': 3} {'type': 'loss', 'content': 0.1357467919588089, 'timestamp': '2025-09-30 22:46:22.999880', 'step': 20996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:23.061815', 'step': 20996, 'epoch': 3} {'type': 'loss', 'content': 0.039520345628261566, 'timestamp': '2025-09-30 22:46:23.074335', 'step': 20997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:23.142135', 'step': 20997, 'epoch': 3} {'type': 'loss', 'content': 0.07503306865692139, 'timestamp': '2025-09-30 22:46:23.145679', 'step': 20998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:23.207443', 'step': 20998, 'epoch': 3} {'type': 'loss', 'content': 0.08117912709712982, 'timestamp': '2025-09-30 22:46:23.212271', 'step': 20999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:23.275994', 'step': 20999, 'epoch': 3} {'type': 'loss', 'content': 0.10230768471956253, 'timestamp': '2025-09-30 22:46:23.284681', 'step': 21000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21000', 'timestamp': '2025-09-30 22:46:23.746390', 'step': 21000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:23.812971', 'step': 21000, 'epoch': 3} {'type': 'loss', 'content': 0.14575202763080597, 'timestamp': '2025-09-30 22:46:23.817599', 'step': 21001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:23.889425', 'step': 21001, 'epoch': 3} {'type': 'loss', 'content': 0.06077240779995918, 'timestamp': '2025-09-30 22:46:23.893716', 'step': 21002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:23.967378', 'step': 21002, 'epoch': 3} {'type': 'loss', 'content': 0.10978919267654419, 'timestamp': '2025-09-30 22:46:23.970642', 'step': 21003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.029076', 'step': 21003, 'epoch': 3} {'type': 'loss', 'content': 0.07049882411956787, 'timestamp': '2025-09-30 22:46:24.038185', 'step': 21004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:24.098045', 'step': 21004, 'epoch': 3} {'type': 'loss', 'content': 0.09587975591421127, 'timestamp': '2025-09-30 22:46:24.102771', 'step': 21005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.174248', 'step': 21005, 'epoch': 3} {'type': 'loss', 'content': 0.10444547235965729, 'timestamp': '2025-09-30 22:46:24.178530', 'step': 21006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:24.238619', 'step': 21006, 'epoch': 3} {'type': 'loss', 'content': 0.14184431731700897, 'timestamp': '2025-09-30 22:46:24.242992', 'step': 21007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:24.303546', 'step': 21007, 'epoch': 3} {'type': 'loss', 'content': 0.11492779105901718, 'timestamp': '2025-09-30 22:46:24.312620', 'step': 21008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.386953', 'step': 21008, 'epoch': 3} {'type': 'loss', 'content': 0.029965024441480637, 'timestamp': '2025-09-30 22:46:24.391009', 'step': 21009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.489233', 'step': 21009, 'epoch': 3} {'type': 'loss', 'content': 0.08276695013046265, 'timestamp': '2025-09-30 22:46:24.503765', 'step': 21010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:24.582647', 'step': 21010, 'epoch': 3} {'type': 'loss', 'content': 0.10514354705810547, 'timestamp': '2025-09-30 22:46:24.596372', 'step': 21011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:24.668693', 'step': 21011, 'epoch': 3} {'type': 'loss', 'content': 0.10247094184160233, 'timestamp': '2025-09-30 22:46:24.676981', 'step': 21012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.738793', 'step': 21012, 'epoch': 3} {'type': 'loss', 'content': 0.11051115393638611, 'timestamp': '2025-09-30 22:46:24.744752', 'step': 21013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.804075', 'step': 21013, 'epoch': 3} {'type': 'loss', 'content': 0.06004941463470459, 'timestamp': '2025-09-30 22:46:24.809875', 'step': 21014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.872307', 'step': 21014, 'epoch': 3} {'type': 'loss', 'content': 0.12627768516540527, 'timestamp': '2025-09-30 22:46:24.875976', 'step': 21015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:24.935746', 'step': 21015, 'epoch': 3} {'type': 'loss', 'content': 0.10719530284404755, 'timestamp': '2025-09-30 22:46:24.942440', 'step': 21016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:25.009502', 'step': 21016, 'epoch': 3} {'type': 'loss', 'content': 0.11042992025613785, 'timestamp': '2025-09-30 22:46:25.013436', 'step': 21017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:25.073860', 'step': 21017, 'epoch': 3} {'type': 'loss', 'content': 0.08999495953321457, 'timestamp': '2025-09-30 22:46:25.079982', 'step': 21018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:25.138923', 'step': 21018, 'epoch': 3} {'type': 'loss', 'content': 0.07424823194742203, 'timestamp': '2025-09-30 22:46:25.142286', 'step': 21019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:25.213679', 'step': 21019, 'epoch': 3} {'type': 'loss', 'content': 0.052379634231328964, 'timestamp': '2025-09-30 22:46:25.221430', 'step': 21020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:25.287502', 'step': 21020, 'epoch': 3} {'type': 'loss', 'content': 0.09039407968521118, 'timestamp': '2025-09-30 22:46:25.291897', 'step': 21021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:25.359031', 'step': 21021, 'epoch': 3} {'type': 'loss', 'content': 0.039005063474178314, 'timestamp': '2025-09-30 22:46:25.363751', 'step': 21022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:25.438788', 'step': 21022, 'epoch': 3} {'type': 'loss', 'content': 0.10694202780723572, 'timestamp': '2025-09-30 22:46:25.447805', 'step': 21023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:25.506577', 'step': 21023, 'epoch': 3} {'type': 'loss', 'content': 0.04436579346656799, 'timestamp': '2025-09-30 22:46:25.514098', 'step': 21024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:25.581738', 'step': 21024, 'epoch': 3} {'type': 'loss', 'content': 0.08511865139007568, 'timestamp': '2025-09-30 22:46:25.586155', 'step': 21025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:25.646274', 'step': 21025, 'epoch': 3} {'type': 'loss', 'content': 0.06769493967294693, 'timestamp': '2025-09-30 22:46:25.650498', 'step': 21026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:25.708197', 'step': 21026, 'epoch': 3} {'type': 'loss', 'content': 0.15665636956691742, 'timestamp': '2025-09-30 22:46:25.711497', 'step': 21027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:25.786349', 'step': 21027, 'epoch': 3} {'type': 'loss', 'content': 0.11158110201358795, 'timestamp': '2025-09-30 22:46:25.792735', 'step': 21028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:25.851644', 'step': 21028, 'epoch': 3} {'type': 'loss', 'content': 0.11141715943813324, 'timestamp': '2025-09-30 22:46:25.854558', 'step': 21029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:25.912710', 'step': 21029, 'epoch': 3} {'type': 'loss', 'content': 0.07265039533376694, 'timestamp': '2025-09-30 22:46:25.916333', 'step': 21030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:25.976455', 'step': 21030, 'epoch': 3} {'type': 'loss', 'content': 0.09783667325973511, 'timestamp': '2025-09-30 22:46:25.980246', 'step': 21031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:26.038611', 'step': 21031, 'epoch': 3} {'type': 'loss', 'content': 0.10447429865598679, 'timestamp': '2025-09-30 22:46:26.056288', 'step': 21032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:26.116734', 'step': 21032, 'epoch': 3} {'type': 'loss', 'content': 0.07877767086029053, 'timestamp': '2025-09-30 22:46:26.131541', 'step': 21033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:26.188644', 'step': 21033, 'epoch': 3} {'type': 'loss', 'content': 0.1137704849243164, 'timestamp': '2025-09-30 22:46:26.192300', 'step': 21034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:26.252123', 'step': 21034, 'epoch': 3} {'type': 'loss', 'content': 0.05746658146381378, 'timestamp': '2025-09-30 22:46:26.258520', 'step': 21035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:26.315888', 'step': 21035, 'epoch': 3} {'type': 'loss', 'content': 0.119501993060112, 'timestamp': '2025-09-30 22:46:26.322975', 'step': 21036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:26.381208', 'step': 21036, 'epoch': 3} {'type': 'loss', 'content': 0.09370079636573792, 'timestamp': '2025-09-30 22:46:26.384652', 'step': 21037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:26.444158', 'step': 21037, 'epoch': 3} {'type': 'loss', 'content': 0.06979066133499146, 'timestamp': '2025-09-30 22:46:26.448307', 'step': 21038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:26.509414', 'step': 21038, 'epoch': 3} {'type': 'loss', 'content': 0.04852557182312012, 'timestamp': '2025-09-30 22:46:26.513122', 'step': 21039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:26.581871', 'step': 21039, 'epoch': 3} {'type': 'loss', 'content': 0.13242888450622559, 'timestamp': '2025-09-30 22:46:26.589161', 'step': 21040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:26.655144', 'step': 21040, 'epoch': 3} {'type': 'loss', 'content': 0.08803891390562057, 'timestamp': '2025-09-30 22:46:26.658365', 'step': 21041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:26.717072', 'step': 21041, 'epoch': 3} {'type': 'loss', 'content': 0.12587526440620422, 'timestamp': '2025-09-30 22:46:26.722145', 'step': 21042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:26.786543', 'step': 21042, 'epoch': 3} {'type': 'loss', 'content': 0.13570187985897064, 'timestamp': '2025-09-30 22:46:26.794648', 'step': 21043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:26.861579', 'step': 21043, 'epoch': 3} {'type': 'loss', 'content': 0.058732375502586365, 'timestamp': '2025-09-30 22:46:26.869109', 'step': 21044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:26.927480', 'step': 21044, 'epoch': 3} {'type': 'loss', 'content': 0.12983787059783936, 'timestamp': '2025-09-30 22:46:26.930992', 'step': 21045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:26.989558', 'step': 21045, 'epoch': 3} {'type': 'loss', 'content': 0.12862983345985413, 'timestamp': '2025-09-30 22:46:27.003032', 'step': 21046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:27.061200', 'step': 21046, 'epoch': 3} {'type': 'loss', 'content': 0.051986631006002426, 'timestamp': '2025-09-30 22:46:27.064622', 'step': 21047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:27.123145', 'step': 21047, 'epoch': 3} {'type': 'loss', 'content': 0.12341813743114471, 'timestamp': '2025-09-30 22:46:27.139818', 'step': 21048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:27.197896', 'step': 21048, 'epoch': 3} {'type': 'loss', 'content': 0.07031939923763275, 'timestamp': '2025-09-30 22:46:27.203762', 'step': 21049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:27.273123', 'step': 21049, 'epoch': 3} {'type': 'loss', 'content': 0.09027346223592758, 'timestamp': '2025-09-30 22:46:27.276439', 'step': 21050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:27.341828', 'step': 21050, 'epoch': 3} {'type': 'loss', 'content': 0.07445314526557922, 'timestamp': '2025-09-30 22:46:27.347734', 'step': 21051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:27.411147', 'step': 21051, 'epoch': 3} {'type': 'loss', 'content': 0.10433618724346161, 'timestamp': '2025-09-30 22:46:27.420374', 'step': 21052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:27.479802', 'step': 21052, 'epoch': 3} {'type': 'loss', 'content': 0.12263043224811554, 'timestamp': '2025-09-30 22:46:27.484383', 'step': 21053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:27.544224', 'step': 21053, 'epoch': 3} {'type': 'loss', 'content': 0.06699460744857788, 'timestamp': '2025-09-30 22:46:27.560797', 'step': 21054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:27.624079', 'step': 21054, 'epoch': 3} {'type': 'loss', 'content': 0.11468528211116791, 'timestamp': '2025-09-30 22:46:27.628903', 'step': 21055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:27.690096', 'step': 21055, 'epoch': 3} {'type': 'loss', 'content': 0.048965852707624435, 'timestamp': '2025-09-30 22:46:27.698914', 'step': 21056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:27.760256', 'step': 21056, 'epoch': 3} {'type': 'loss', 'content': 0.0913248211145401, 'timestamp': '2025-09-30 22:46:27.764489', 'step': 21057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:27.827414', 'step': 21057, 'epoch': 3} {'type': 'loss', 'content': 0.17195579409599304, 'timestamp': '2025-09-30 22:46:27.830145', 'step': 21058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:27.900591', 'step': 21058, 'epoch': 3} {'type': 'loss', 'content': 0.11573360860347748, 'timestamp': '2025-09-30 22:46:27.905921', 'step': 21059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:27.964252', 'step': 21059, 'epoch': 3} {'type': 'loss', 'content': 0.04090049862861633, 'timestamp': '2025-09-30 22:46:27.972107', 'step': 21060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:28.042102', 'step': 21060, 'epoch': 3} {'type': 'loss', 'content': 0.0653785839676857, 'timestamp': '2025-09-30 22:46:28.045784', 'step': 21061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:28.106443', 'step': 21061, 'epoch': 3} {'type': 'loss', 'content': 0.12998390197753906, 'timestamp': '2025-09-30 22:46:28.111977', 'step': 21062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:28.171835', 'step': 21062, 'epoch': 3} {'type': 'loss', 'content': 0.09589225798845291, 'timestamp': '2025-09-30 22:46:28.176233', 'step': 21063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:28.246623', 'step': 21063, 'epoch': 3} {'type': 'loss', 'content': 0.09181114286184311, 'timestamp': '2025-09-30 22:46:28.257960', 'step': 21064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:28.316046', 'step': 21064, 'epoch': 3} {'type': 'loss', 'content': 0.1521817445755005, 'timestamp': '2025-09-30 22:46:28.329254', 'step': 21065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:28.398716', 'step': 21065, 'epoch': 3} {'type': 'loss', 'content': 0.065445177257061, 'timestamp': '2025-09-30 22:46:28.411539', 'step': 21066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:28.481943', 'step': 21066, 'epoch': 3} {'type': 'loss', 'content': 0.10112275928258896, 'timestamp': '2025-09-30 22:46:28.486217', 'step': 21067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:28.554549', 'step': 21067, 'epoch': 3} {'type': 'loss', 'content': 0.08709868043661118, 'timestamp': '2025-09-30 22:46:28.564503', 'step': 21068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:28.631291', 'step': 21068, 'epoch': 3} {'type': 'loss', 'content': 0.1039544939994812, 'timestamp': '2025-09-30 22:46:28.645995', 'step': 21069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:28.704207', 'step': 21069, 'epoch': 3} {'type': 'loss', 'content': 0.1013236716389656, 'timestamp': '2025-09-30 22:46:28.711291', 'step': 21070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:28.771074', 'step': 21070, 'epoch': 3} {'type': 'loss', 'content': 0.07897475361824036, 'timestamp': '2025-09-30 22:46:28.775709', 'step': 21071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:28.835454', 'step': 21071, 'epoch': 3} {'type': 'loss', 'content': 0.06813494861125946, 'timestamp': '2025-09-30 22:46:28.853546', 'step': 21072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:28.923046', 'step': 21072, 'epoch': 3} {'type': 'loss', 'content': 0.16497129201889038, 'timestamp': '2025-09-30 22:46:28.927244', 'step': 21073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:28.998534', 'step': 21073, 'epoch': 3} {'type': 'loss', 'content': 0.08831862360239029, 'timestamp': '2025-09-30 22:46:29.003416', 'step': 21074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:29.065424', 'step': 21074, 'epoch': 3} {'type': 'loss', 'content': 0.0609874427318573, 'timestamp': '2025-09-30 22:46:29.070481', 'step': 21075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:29.130685', 'step': 21075, 'epoch': 3} {'type': 'loss', 'content': 0.14066137373447418, 'timestamp': '2025-09-30 22:46:29.142445', 'step': 21076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:29.201830', 'step': 21076, 'epoch': 3} {'type': 'loss', 'content': 0.07232192158699036, 'timestamp': '2025-09-30 22:46:29.206851', 'step': 21077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:29.265098', 'step': 21077, 'epoch': 3} {'type': 'loss', 'content': 0.15080220997333527, 'timestamp': '2025-09-30 22:46:29.269759', 'step': 21078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:29.332889', 'step': 21078, 'epoch': 3} {'type': 'loss', 'content': 0.0702032744884491, 'timestamp': '2025-09-30 22:46:29.337644', 'step': 21079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:29.403642', 'step': 21079, 'epoch': 3} {'type': 'loss', 'content': 0.048439256846904755, 'timestamp': '2025-09-30 22:46:29.412427', 'step': 21080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:29.484695', 'step': 21080, 'epoch': 3} {'type': 'loss', 'content': 0.05649404972791672, 'timestamp': '2025-09-30 22:46:29.492965', 'step': 21081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:29.560415', 'step': 21081, 'epoch': 3} {'type': 'loss', 'content': 0.09304983913898468, 'timestamp': '2025-09-30 22:46:29.574090', 'step': 21082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:29.635083', 'step': 21082, 'epoch': 3} {'type': 'loss', 'content': 0.06348972022533417, 'timestamp': '2025-09-30 22:46:29.639303', 'step': 21083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:29.700364', 'step': 21083, 'epoch': 3} {'type': 'loss', 'content': 0.09404974430799484, 'timestamp': '2025-09-30 22:46:29.707314', 'step': 21084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:29.765853', 'step': 21084, 'epoch': 3} {'type': 'loss', 'content': 0.06368175894021988, 'timestamp': '2025-09-30 22:46:29.769768', 'step': 21085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:29.837885', 'step': 21085, 'epoch': 3} {'type': 'loss', 'content': 0.07965052127838135, 'timestamp': '2025-09-30 22:46:29.841230', 'step': 21086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:29.900546', 'step': 21086, 'epoch': 3} {'type': 'loss', 'content': 0.10901133716106415, 'timestamp': '2025-09-30 22:46:29.914266', 'step': 21087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:29.999552', 'step': 21087, 'epoch': 3} {'type': 'loss', 'content': 0.0761251375079155, 'timestamp': '2025-09-30 22:46:30.007053', 'step': 21088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:30.077744', 'step': 21088, 'epoch': 3} {'type': 'loss', 'content': 0.07265430688858032, 'timestamp': '2025-09-30 22:46:30.082567', 'step': 21089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:30.140335', 'step': 21089, 'epoch': 3} {'type': 'loss', 'content': 0.06019319221377373, 'timestamp': '2025-09-30 22:46:30.143969', 'step': 21090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:30.201730', 'step': 21090, 'epoch': 3} {'type': 'loss', 'content': 0.09892706573009491, 'timestamp': '2025-09-30 22:46:30.205239', 'step': 21091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:30.266747', 'step': 21091, 'epoch': 3} {'type': 'loss', 'content': 0.08095256239175797, 'timestamp': '2025-09-30 22:46:30.273509', 'step': 21092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:30.331791', 'step': 21092, 'epoch': 3} {'type': 'loss', 'content': 0.01852606236934662, 'timestamp': '2025-09-30 22:46:30.335904', 'step': 21093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:30.393803', 'step': 21093, 'epoch': 3} {'type': 'loss', 'content': 0.1305622160434723, 'timestamp': '2025-09-30 22:46:30.397179', 'step': 21094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:30.455640', 'step': 21094, 'epoch': 3} {'type': 'loss', 'content': 0.059341248124837875, 'timestamp': '2025-09-30 22:46:30.459751', 'step': 21095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:30.517914', 'step': 21095, 'epoch': 3} {'type': 'loss', 'content': 0.08330284059047699, 'timestamp': '2025-09-30 22:46:30.525672', 'step': 21096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:30.597565', 'step': 21096, 'epoch': 3} {'type': 'loss', 'content': 0.11154478788375854, 'timestamp': '2025-09-30 22:46:30.601391', 'step': 21097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:30.667077', 'step': 21097, 'epoch': 3} {'type': 'loss', 'content': 0.1208174005150795, 'timestamp': '2025-09-30 22:46:30.670293', 'step': 21098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:30.733983', 'step': 21098, 'epoch': 3} {'type': 'loss', 'content': 0.05336271971464157, 'timestamp': '2025-09-30 22:46:30.737640', 'step': 21099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:30.797607', 'step': 21099, 'epoch': 3} {'type': 'loss', 'content': 0.12753523886203766, 'timestamp': '2025-09-30 22:46:30.805125', 'step': 21100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:30.862236', 'step': 21100, 'epoch': 3} {'type': 'loss', 'content': 0.04429884999990463, 'timestamp': '2025-09-30 22:46:30.873493', 'step': 21101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:30.932241', 'step': 21101, 'epoch': 3} {'type': 'loss', 'content': 0.07752146571874619, 'timestamp': '2025-09-30 22:46:30.935277', 'step': 21102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:31.006808', 'step': 21102, 'epoch': 3} {'type': 'loss', 'content': 0.13709864020347595, 'timestamp': '2025-09-30 22:46:31.010871', 'step': 21103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:31.075276', 'step': 21103, 'epoch': 3} {'type': 'loss', 'content': 0.07272882014513016, 'timestamp': '2025-09-30 22:46:31.082617', 'step': 21104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:31.140570', 'step': 21104, 'epoch': 3} {'type': 'loss', 'content': 0.07459638267755508, 'timestamp': '2025-09-30 22:46:31.144242', 'step': 21105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:31.203219', 'step': 21105, 'epoch': 3} {'type': 'loss', 'content': 0.054107483476400375, 'timestamp': '2025-09-30 22:46:31.207428', 'step': 21106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:31.269301', 'step': 21106, 'epoch': 3} {'type': 'loss', 'content': 0.08842463791370392, 'timestamp': '2025-09-30 22:46:31.273649', 'step': 21107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:31.336066', 'step': 21107, 'epoch': 3} {'type': 'loss', 'content': 0.06947324424982071, 'timestamp': '2025-09-30 22:46:31.342687', 'step': 21108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:31.410958', 'step': 21108, 'epoch': 3} {'type': 'loss', 'content': 0.026036247611045837, 'timestamp': '2025-09-30 22:46:31.418651', 'step': 21109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:31.498771', 'step': 21109, 'epoch': 3} {'type': 'loss', 'content': 0.14908385276794434, 'timestamp': '2025-09-30 22:46:31.503455', 'step': 21110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:31.571226', 'step': 21110, 'epoch': 3} {'type': 'loss', 'content': 0.05566701665520668, 'timestamp': '2025-09-30 22:46:31.582179', 'step': 21111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:31.652083', 'step': 21111, 'epoch': 3} {'type': 'loss', 'content': 0.15049657225608826, 'timestamp': '2025-09-30 22:46:31.665034', 'step': 21112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:31.732668', 'step': 21112, 'epoch': 3} {'type': 'loss', 'content': 0.13868649303913116, 'timestamp': '2025-09-30 22:46:31.744978', 'step': 21113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:31.803344', 'step': 21113, 'epoch': 3} {'type': 'loss', 'content': 0.158858522772789, 'timestamp': '2025-09-30 22:46:31.812774', 'step': 21114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:31.878521', 'step': 21114, 'epoch': 3} {'type': 'loss', 'content': 0.07169389724731445, 'timestamp': '2025-09-30 22:46:31.881447', 'step': 21115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:31.958922', 'step': 21115, 'epoch': 3} {'type': 'loss', 'content': 0.05083758756518364, 'timestamp': '2025-09-30 22:46:31.966125', 'step': 21116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:32.031530', 'step': 21116, 'epoch': 3} {'type': 'loss', 'content': 0.10347054153680801, 'timestamp': '2025-09-30 22:46:32.039747', 'step': 21117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:32.099529', 'step': 21117, 'epoch': 3} {'type': 'loss', 'content': 0.07141858339309692, 'timestamp': '2025-09-30 22:46:32.108391', 'step': 21118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:32.168902', 'step': 21118, 'epoch': 3} {'type': 'loss', 'content': 0.08155810087919235, 'timestamp': '2025-09-30 22:46:32.173144', 'step': 21119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:32.238275', 'step': 21119, 'epoch': 3} {'type': 'loss', 'content': 0.062244314700365067, 'timestamp': '2025-09-30 22:46:32.253084', 'step': 21120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:32.318826', 'step': 21120, 'epoch': 3} {'type': 'loss', 'content': 0.16219159960746765, 'timestamp': '2025-09-30 22:46:32.328705', 'step': 21121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:32.394236', 'step': 21121, 'epoch': 3} {'type': 'loss', 'content': 0.09961175173521042, 'timestamp': '2025-09-30 22:46:32.396991', 'step': 21122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:32.455765', 'step': 21122, 'epoch': 3} {'type': 'loss', 'content': 0.07735466957092285, 'timestamp': '2025-09-30 22:46:32.471041', 'step': 21123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:32.536316', 'step': 21123, 'epoch': 3} {'type': 'loss', 'content': 0.12519626319408417, 'timestamp': '2025-09-30 22:46:32.546001', 'step': 21124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:32.640380', 'step': 21124, 'epoch': 3} {'type': 'loss', 'content': 0.08296230435371399, 'timestamp': '2025-09-30 22:46:32.649458', 'step': 21125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:32.721380', 'step': 21125, 'epoch': 3} {'type': 'loss', 'content': 0.15566587448120117, 'timestamp': '2025-09-30 22:46:32.725748', 'step': 21126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:32.788269', 'step': 21126, 'epoch': 3} {'type': 'loss', 'content': 0.08915875852108002, 'timestamp': '2025-09-30 22:46:32.791289', 'step': 21127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:32.851348', 'step': 21127, 'epoch': 3} {'type': 'loss', 'content': 0.052163880318403244, 'timestamp': '2025-09-30 22:46:32.859072', 'step': 21128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:32.918045', 'step': 21128, 'epoch': 3} {'type': 'loss', 'content': 0.1012735590338707, 'timestamp': '2025-09-30 22:46:32.921009', 'step': 21129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:33.004267', 'step': 21129, 'epoch': 3} {'type': 'loss', 'content': 0.13765718042850494, 'timestamp': '2025-09-30 22:46:33.015762', 'step': 21130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:33.075512', 'step': 21130, 'epoch': 3} {'type': 'loss', 'content': 0.12675143778324127, 'timestamp': '2025-09-30 22:46:33.085666', 'step': 21131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:33.143563', 'step': 21131, 'epoch': 3} {'type': 'loss', 'content': 0.09736935794353485, 'timestamp': '2025-09-30 22:46:33.159252', 'step': 21132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:33.224065', 'step': 21132, 'epoch': 3} {'type': 'loss', 'content': 0.14729149639606476, 'timestamp': '2025-09-30 22:46:33.227326', 'step': 21133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:33.300219', 'step': 21133, 'epoch': 3} {'type': 'loss', 'content': 0.08311115950345993, 'timestamp': '2025-09-30 22:46:33.310063', 'step': 21134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:33.367154', 'step': 21134, 'epoch': 3} {'type': 'loss', 'content': 0.04122733697295189, 'timestamp': '2025-09-30 22:46:33.381040', 'step': 21135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:33.447323', 'step': 21135, 'epoch': 3} {'type': 'loss', 'content': 0.0709739401936531, 'timestamp': '2025-09-30 22:46:33.454296', 'step': 21136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:33.511141', 'step': 21136, 'epoch': 3} {'type': 'loss', 'content': 0.0448404923081398, 'timestamp': '2025-09-30 22:46:33.514972', 'step': 21137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:33.585690', 'step': 21137, 'epoch': 3} {'type': 'loss', 'content': 0.15174463391304016, 'timestamp': '2025-09-30 22:46:33.589779', 'step': 21138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:33.663655', 'step': 21138, 'epoch': 3} {'type': 'loss', 'content': 0.13048133254051208, 'timestamp': '2025-09-30 22:46:33.668855', 'step': 21139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:33.730907', 'step': 21139, 'epoch': 3} {'type': 'loss', 'content': 0.12518779933452606, 'timestamp': '2025-09-30 22:46:33.743250', 'step': 21140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:33.800181', 'step': 21140, 'epoch': 3} {'type': 'loss', 'content': 0.037812843918800354, 'timestamp': '2025-09-30 22:46:33.804043', 'step': 21141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:33.881327', 'step': 21141, 'epoch': 3} {'type': 'loss', 'content': 0.07095081359148026, 'timestamp': '2025-09-30 22:46:33.885271', 'step': 21142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:33.958311', 'step': 21142, 'epoch': 3} {'type': 'loss', 'content': 0.07174143195152283, 'timestamp': '2025-09-30 22:46:33.963138', 'step': 21143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:34.024068', 'step': 21143, 'epoch': 3} {'type': 'loss', 'content': 0.07883334159851074, 'timestamp': '2025-09-30 22:46:34.038912', 'step': 21144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:34.097080', 'step': 21144, 'epoch': 3} {'type': 'loss', 'content': 0.10679762065410614, 'timestamp': '2025-09-30 22:46:34.100969', 'step': 21145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:34.160065', 'step': 21145, 'epoch': 3} {'type': 'loss', 'content': 0.08241625875234604, 'timestamp': '2025-09-30 22:46:34.163027', 'step': 21146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:34.221596', 'step': 21146, 'epoch': 3} {'type': 'loss', 'content': 0.10121054202318192, 'timestamp': '2025-09-30 22:46:34.224757', 'step': 21147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:34.284546', 'step': 21147, 'epoch': 3} {'type': 'loss', 'content': 0.14688421785831451, 'timestamp': '2025-09-30 22:46:34.291938', 'step': 21148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:34.350831', 'step': 21148, 'epoch': 3} {'type': 'loss', 'content': 0.07623784244060516, 'timestamp': '2025-09-30 22:46:34.355258', 'step': 21149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:34.415074', 'step': 21149, 'epoch': 3} {'type': 'loss', 'content': 0.08917801082134247, 'timestamp': '2025-09-30 22:46:34.418905', 'step': 21150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:34.486397', 'step': 21150, 'epoch': 3} {'type': 'loss', 'content': 0.04835163429379463, 'timestamp': '2025-09-30 22:46:34.495738', 'step': 21151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:34.555432', 'step': 21151, 'epoch': 3} {'type': 'loss', 'content': 0.1583113819360733, 'timestamp': '2025-09-30 22:46:34.566841', 'step': 21152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:34.639523', 'step': 21152, 'epoch': 3} {'type': 'loss', 'content': 0.08080174773931503, 'timestamp': '2025-09-30 22:46:34.643027', 'step': 21153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:34.704246', 'step': 21153, 'epoch': 3} {'type': 'loss', 'content': 0.11136513948440552, 'timestamp': '2025-09-30 22:46:34.707299', 'step': 21154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:34.770406', 'step': 21154, 'epoch': 3} {'type': 'loss', 'content': 0.09066123515367508, 'timestamp': '2025-09-30 22:46:34.772955', 'step': 21155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:34.835879', 'step': 21155, 'epoch': 3} {'type': 'loss', 'content': 0.07980452477931976, 'timestamp': '2025-09-30 22:46:34.841733', 'step': 21156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:34.903084', 'step': 21156, 'epoch': 3} {'type': 'loss', 'content': 0.06670180708169937, 'timestamp': '2025-09-30 22:46:34.910620', 'step': 21157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:34.968344', 'step': 21157, 'epoch': 3} {'type': 'loss', 'content': 0.06526503711938858, 'timestamp': '2025-09-30 22:46:34.976435', 'step': 21158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:35.037120', 'step': 21158, 'epoch': 3} {'type': 'loss', 'content': 0.1046898290514946, 'timestamp': '2025-09-30 22:46:35.039927', 'step': 21159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:35.107258', 'step': 21159, 'epoch': 3} {'type': 'loss', 'content': 0.11001697182655334, 'timestamp': '2025-09-30 22:46:35.113878', 'step': 21160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:35.172058', 'step': 21160, 'epoch': 3} {'type': 'loss', 'content': 0.08165644854307175, 'timestamp': '2025-09-30 22:46:35.178760', 'step': 21161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:35.236899', 'step': 21161, 'epoch': 3} {'type': 'loss', 'content': 0.17438077926635742, 'timestamp': '2025-09-30 22:46:35.240499', 'step': 21162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:35.300206', 'step': 21162, 'epoch': 3} {'type': 'loss', 'content': 0.07838697731494904, 'timestamp': '2025-09-30 22:46:35.306751', 'step': 21163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:35.366860', 'step': 21163, 'epoch': 3} {'type': 'loss', 'content': 0.09624842554330826, 'timestamp': '2025-09-30 22:46:35.373992', 'step': 21164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:35.444851', 'step': 21164, 'epoch': 3} {'type': 'loss', 'content': 0.06967435777187347, 'timestamp': '2025-09-30 22:46:35.450426', 'step': 21165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:35.527567', 'step': 21165, 'epoch': 3} {'type': 'loss', 'content': 0.03417058289051056, 'timestamp': '2025-09-30 22:46:35.530342', 'step': 21166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:35.592659', 'step': 21166, 'epoch': 3} {'type': 'loss', 'content': 0.04719981923699379, 'timestamp': '2025-09-30 22:46:35.595446', 'step': 21167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:35.665557', 'step': 21167, 'epoch': 3} {'type': 'loss', 'content': 0.04392341896891594, 'timestamp': '2025-09-30 22:46:35.678173', 'step': 21168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:35.738882', 'step': 21168, 'epoch': 3} {'type': 'loss', 'content': 0.10173901915550232, 'timestamp': '2025-09-30 22:46:35.745251', 'step': 21169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:35.819960', 'step': 21169, 'epoch': 3} {'type': 'loss', 'content': 0.16589738428592682, 'timestamp': '2025-09-30 22:46:35.822590', 'step': 21170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:35.882430', 'step': 21170, 'epoch': 3} {'type': 'loss', 'content': 0.07653971016407013, 'timestamp': '2025-09-30 22:46:35.885411', 'step': 21171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:35.959829', 'step': 21171, 'epoch': 3} {'type': 'loss', 'content': 0.08908825367689133, 'timestamp': '2025-09-30 22:46:35.978351', 'step': 21172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:36.054442', 'step': 21172, 'epoch': 3} {'type': 'loss', 'content': 0.04365032911300659, 'timestamp': '2025-09-30 22:46:36.060867', 'step': 21173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:36.120745', 'step': 21173, 'epoch': 3} {'type': 'loss', 'content': 0.18860766291618347, 'timestamp': '2025-09-30 22:46:36.128126', 'step': 21174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:36.187125', 'step': 21174, 'epoch': 3} {'type': 'loss', 'content': 0.08634067326784134, 'timestamp': '2025-09-30 22:46:36.189790', 'step': 21175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:46:36.255577', 'step': 21175, 'epoch': 3} {'type': 'loss', 'content': 0.06421733647584915, 'timestamp': '2025-09-30 22:46:36.264576', 'step': 21176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:36.334448', 'step': 21176, 'epoch': 3} {'type': 'loss', 'content': 0.10038390010595322, 'timestamp': '2025-09-30 22:46:36.337545', 'step': 21177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:36.397080', 'step': 21177, 'epoch': 3} {'type': 'loss', 'content': 0.1059441864490509, 'timestamp': '2025-09-30 22:46:36.400513', 'step': 21178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:46:36.470811', 'step': 21178, 'epoch': 3} {'type': 'loss', 'content': 0.17709116637706757, 'timestamp': '2025-09-30 22:46:36.479472', 'step': 21179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:36.544873', 'step': 21179, 'epoch': 3} {'type': 'loss', 'content': 0.10458426922559738, 'timestamp': '2025-09-30 22:46:36.557915', 'step': 21180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:36.632656', 'step': 21180, 'epoch': 3} {'type': 'loss', 'content': 0.03238103166222572, 'timestamp': '2025-09-30 22:46:36.642313', 'step': 21181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:36.704392', 'step': 21181, 'epoch': 3} {'type': 'loss', 'content': 0.030977336689829826, 'timestamp': '2025-09-30 22:46:36.707385', 'step': 21182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:36.772732', 'step': 21182, 'epoch': 3} {'type': 'loss', 'content': 0.10402590036392212, 'timestamp': '2025-09-30 22:46:36.776501', 'step': 21183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:36.855108', 'step': 21183, 'epoch': 3} {'type': 'loss', 'content': 0.15194889903068542, 'timestamp': '2025-09-30 22:46:36.863792', 'step': 21184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:36.926303', 'step': 21184, 'epoch': 3} {'type': 'loss', 'content': 0.06944103538990021, 'timestamp': '2025-09-30 22:46:36.929996', 'step': 21185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:37.009374', 'step': 21185, 'epoch': 3} {'type': 'loss', 'content': 0.11029305309057236, 'timestamp': '2025-09-30 22:46:37.014229', 'step': 21186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:37.083952', 'step': 21186, 'epoch': 3} {'type': 'loss', 'content': 0.06845361739397049, 'timestamp': '2025-09-30 22:46:37.087099', 'step': 21187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:37.153368', 'step': 21187, 'epoch': 3} {'type': 'loss', 'content': 0.11402678489685059, 'timestamp': '2025-09-30 22:46:37.160207', 'step': 21188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:37.230249', 'step': 21188, 'epoch': 3} {'type': 'loss', 'content': 0.07595900446176529, 'timestamp': '2025-09-30 22:46:37.235380', 'step': 21189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:37.303594', 'step': 21189, 'epoch': 3} {'type': 'loss', 'content': 0.12040271610021591, 'timestamp': '2025-09-30 22:46:37.308280', 'step': 21190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:37.372087', 'step': 21190, 'epoch': 3} {'type': 'loss', 'content': 0.09785163402557373, 'timestamp': '2025-09-30 22:46:37.376442', 'step': 21191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:37.449307', 'step': 21191, 'epoch': 3} {'type': 'loss', 'content': 0.09503873437643051, 'timestamp': '2025-09-30 22:46:37.463194', 'step': 21192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:37.530829', 'step': 21192, 'epoch': 3} {'type': 'loss', 'content': 0.05915914103388786, 'timestamp': '2025-09-30 22:46:37.535743', 'step': 21193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:37.600087', 'step': 21193, 'epoch': 3} {'type': 'loss', 'content': 0.14255531132221222, 'timestamp': '2025-09-30 22:46:37.605876', 'step': 21194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:37.675833', 'step': 21194, 'epoch': 3} {'type': 'loss', 'content': 0.05095547437667847, 'timestamp': '2025-09-30 22:46:37.680622', 'step': 21195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:37.752813', 'step': 21195, 'epoch': 3} {'type': 'loss', 'content': 0.1794869601726532, 'timestamp': '2025-09-30 22:46:37.760205', 'step': 21196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:37.819148', 'step': 21196, 'epoch': 3} {'type': 'loss', 'content': 0.026604263111948967, 'timestamp': '2025-09-30 22:46:37.824052', 'step': 21197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:37.890034', 'step': 21197, 'epoch': 3} {'type': 'loss', 'content': 0.11641501635313034, 'timestamp': '2025-09-30 22:46:37.892872', 'step': 21198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:37.954735', 'step': 21198, 'epoch': 3} {'type': 'loss', 'content': 0.09594141691923141, 'timestamp': '2025-09-30 22:46:37.957581', 'step': 21199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:38.023829', 'step': 21199, 'epoch': 3} {'type': 'loss', 'content': 0.0878496915102005, 'timestamp': '2025-09-30 22:46:38.029633', 'step': 21200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:38.092159', 'step': 21200, 'epoch': 3} {'type': 'loss', 'content': 0.10829128324985504, 'timestamp': '2025-09-30 22:46:38.095185', 'step': 21201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:38.156075', 'step': 21201, 'epoch': 3} {'type': 'loss', 'content': 0.01897529885172844, 'timestamp': '2025-09-30 22:46:38.161065', 'step': 21202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:38.252166', 'step': 21202, 'epoch': 3} {'type': 'loss', 'content': 0.14973080158233643, 'timestamp': '2025-09-30 22:46:38.262011', 'step': 21203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:38.322319', 'step': 21203, 'epoch': 3} {'type': 'loss', 'content': 0.08060453832149506, 'timestamp': '2025-09-30 22:46:38.328778', 'step': 21204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:38.390937', 'step': 21204, 'epoch': 3} {'type': 'loss', 'content': 0.10967127233743668, 'timestamp': '2025-09-30 22:46:38.395384', 'step': 21205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:38.471921', 'step': 21205, 'epoch': 3} {'type': 'loss', 'content': 0.09594804793596268, 'timestamp': '2025-09-30 22:46:38.475468', 'step': 21206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:38.535388', 'step': 21206, 'epoch': 3} {'type': 'loss', 'content': 0.07093419134616852, 'timestamp': '2025-09-30 22:46:38.539132', 'step': 21207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:38.599155', 'step': 21207, 'epoch': 3} {'type': 'loss', 'content': 0.11769118160009384, 'timestamp': '2025-09-30 22:46:38.616075', 'step': 21208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:38.681703', 'step': 21208, 'epoch': 3} {'type': 'loss', 'content': 0.0728527083992958, 'timestamp': '2025-09-30 22:46:38.686906', 'step': 21209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:38.752337', 'step': 21209, 'epoch': 3} {'type': 'loss', 'content': 0.1275024116039276, 'timestamp': '2025-09-30 22:46:38.759182', 'step': 21210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:38.829657', 'step': 21210, 'epoch': 3} {'type': 'loss', 'content': 0.13449320197105408, 'timestamp': '2025-09-30 22:46:38.835311', 'step': 21211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:38.908589', 'step': 21211, 'epoch': 3} {'type': 'loss', 'content': 0.19845061004161835, 'timestamp': '2025-09-30 22:46:38.916906', 'step': 21212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:38.974538', 'step': 21212, 'epoch': 3} {'type': 'loss', 'content': 0.031232384964823723, 'timestamp': '2025-09-30 22:46:38.988989', 'step': 21213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:39.055652', 'step': 21213, 'epoch': 3} {'type': 'loss', 'content': 0.17595940828323364, 'timestamp': '2025-09-30 22:46:39.069586', 'step': 21214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:39.135365', 'step': 21214, 'epoch': 3} {'type': 'loss', 'content': 0.11803989857435226, 'timestamp': '2025-09-30 22:46:39.140928', 'step': 21215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:39.202774', 'step': 21215, 'epoch': 3} {'type': 'loss', 'content': 0.05619879439473152, 'timestamp': '2025-09-30 22:46:39.214772', 'step': 21216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:39.274363', 'step': 21216, 'epoch': 3} {'type': 'loss', 'content': 0.05336298048496246, 'timestamp': '2025-09-30 22:46:39.278019', 'step': 21217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:39.337400', 'step': 21217, 'epoch': 3} {'type': 'loss', 'content': 0.10193170607089996, 'timestamp': '2025-09-30 22:46:39.343613', 'step': 21218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:39.403141', 'step': 21218, 'epoch': 3} {'type': 'loss', 'content': 0.0914585143327713, 'timestamp': '2025-09-30 22:46:39.407249', 'step': 21219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:39.466387', 'step': 21219, 'epoch': 3} {'type': 'loss', 'content': 0.13466081023216248, 'timestamp': '2025-09-30 22:46:39.475216', 'step': 21220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:39.533267', 'step': 21220, 'epoch': 3} {'type': 'loss', 'content': 0.08638904243707657, 'timestamp': '2025-09-30 22:46:39.538749', 'step': 21221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:39.603578', 'step': 21221, 'epoch': 3} {'type': 'loss', 'content': 0.06411129236221313, 'timestamp': '2025-09-30 22:46:39.608194', 'step': 21222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:39.678668', 'step': 21222, 'epoch': 3} {'type': 'loss', 'content': 0.08226673305034637, 'timestamp': '2025-09-30 22:46:39.693343', 'step': 21223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:39.759737', 'step': 21223, 'epoch': 3} {'type': 'loss', 'content': 0.050276897847652435, 'timestamp': '2025-09-30 22:46:39.770641', 'step': 21224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:39.831985', 'step': 21224, 'epoch': 3} {'type': 'loss', 'content': 0.10794772952795029, 'timestamp': '2025-09-30 22:46:39.838644', 'step': 21225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:39.909608', 'step': 21225, 'epoch': 3} {'type': 'loss', 'content': 0.08393755555152893, 'timestamp': '2025-09-30 22:46:39.915079', 'step': 21226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:39.989976', 'step': 21226, 'epoch': 3} {'type': 'loss', 'content': 0.18606695532798767, 'timestamp': '2025-09-30 22:46:39.994228', 'step': 21227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:46:40.055731', 'step': 21227, 'epoch': 3} {'type': 'loss', 'content': 0.041889339685440063, 'timestamp': '2025-09-30 22:46:40.062670', 'step': 21228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:40.126248', 'step': 21228, 'epoch': 3} {'type': 'loss', 'content': 0.05868120864033699, 'timestamp': '2025-09-30 22:46:40.139582', 'step': 21229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:40.204848', 'step': 21229, 'epoch': 3} {'type': 'loss', 'content': 0.09070215374231339, 'timestamp': '2025-09-30 22:46:40.208298', 'step': 21230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:40.269052', 'step': 21230, 'epoch': 3} {'type': 'loss', 'content': 0.10876169800758362, 'timestamp': '2025-09-30 22:46:40.274760', 'step': 21231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:40.333458', 'step': 21231, 'epoch': 3} {'type': 'loss', 'content': 0.030885115265846252, 'timestamp': '2025-09-30 22:46:40.340662', 'step': 21232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:40.402544', 'step': 21232, 'epoch': 3} {'type': 'loss', 'content': 0.0957309827208519, 'timestamp': '2025-09-30 22:46:40.406177', 'step': 21233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:40.464211', 'step': 21233, 'epoch': 3} {'type': 'loss', 'content': 0.08885858207941055, 'timestamp': '2025-09-30 22:46:40.468299', 'step': 21234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:40.537860', 'step': 21234, 'epoch': 3} {'type': 'loss', 'content': 0.178945854306221, 'timestamp': '2025-09-30 22:46:40.543412', 'step': 21235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:40.604768', 'step': 21235, 'epoch': 3} {'type': 'loss', 'content': 0.11389816552400589, 'timestamp': '2025-09-30 22:46:40.613702', 'step': 21236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:40.672951', 'step': 21236, 'epoch': 3} {'type': 'loss', 'content': 0.16492313146591187, 'timestamp': '2025-09-30 22:46:40.677187', 'step': 21237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:40.736668', 'step': 21237, 'epoch': 3} {'type': 'loss', 'content': 0.21954070031642914, 'timestamp': '2025-09-30 22:46:40.740489', 'step': 21238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:40.804382', 'step': 21238, 'epoch': 3} {'type': 'loss', 'content': 0.1604508012533188, 'timestamp': '2025-09-30 22:46:40.809532', 'step': 21239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:40.884310', 'step': 21239, 'epoch': 3} {'type': 'loss', 'content': 0.08513309806585312, 'timestamp': '2025-09-30 22:46:40.900337', 'step': 21240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:40.967193', 'step': 21240, 'epoch': 3} {'type': 'loss', 'content': 0.11426453292369843, 'timestamp': '2025-09-30 22:46:40.972229', 'step': 21241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:41.034710', 'step': 21241, 'epoch': 3} {'type': 'loss', 'content': 0.19037987291812897, 'timestamp': '2025-09-30 22:46:41.038999', 'step': 21242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:41.097593', 'step': 21242, 'epoch': 3} {'type': 'loss', 'content': 0.07927342504262924, 'timestamp': '2025-09-30 22:46:41.100897', 'step': 21243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:41.173770', 'step': 21243, 'epoch': 3} {'type': 'loss', 'content': 0.10723642259836197, 'timestamp': '2025-09-30 22:46:41.180826', 'step': 21244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:41.238021', 'step': 21244, 'epoch': 3} {'type': 'loss', 'content': 0.08664540946483612, 'timestamp': '2025-09-30 22:46:41.241958', 'step': 21245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:41.302973', 'step': 21245, 'epoch': 3} {'type': 'loss', 'content': 0.05532176047563553, 'timestamp': '2025-09-30 22:46:41.308001', 'step': 21246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:41.368620', 'step': 21246, 'epoch': 3} {'type': 'loss', 'content': 0.04230577498674393, 'timestamp': '2025-09-30 22:46:41.373245', 'step': 21247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:41.441418', 'step': 21247, 'epoch': 3} {'type': 'loss', 'content': 0.05352197214961052, 'timestamp': '2025-09-30 22:46:41.450203', 'step': 21248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:41.521912', 'step': 21248, 'epoch': 3} {'type': 'loss', 'content': 0.12483163177967072, 'timestamp': '2025-09-30 22:46:41.525030', 'step': 21249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:41.584261', 'step': 21249, 'epoch': 3} {'type': 'loss', 'content': 0.10939878970384598, 'timestamp': '2025-09-30 22:46:41.588451', 'step': 21250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:41.656331', 'step': 21250, 'epoch': 3} {'type': 'loss', 'content': 0.13813503086566925, 'timestamp': '2025-09-30 22:46:41.669871', 'step': 21251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:41.734717', 'step': 21251, 'epoch': 3} {'type': 'loss', 'content': 0.05952295660972595, 'timestamp': '2025-09-30 22:46:41.743180', 'step': 21252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:41.802724', 'step': 21252, 'epoch': 3} {'type': 'loss', 'content': 0.05409615859389305, 'timestamp': '2025-09-30 22:46:41.806453', 'step': 21253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:41.881692', 'step': 21253, 'epoch': 3} {'type': 'loss', 'content': 0.13328714668750763, 'timestamp': '2025-09-30 22:46:41.886576', 'step': 21254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:41.947883', 'step': 21254, 'epoch': 3} {'type': 'loss', 'content': 0.10254602134227753, 'timestamp': '2025-09-30 22:46:41.952878', 'step': 21255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:42.015154', 'step': 21255, 'epoch': 3} {'type': 'loss', 'content': 0.17180973291397095, 'timestamp': '2025-09-30 22:46:42.023547', 'step': 21256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:42.083811', 'step': 21256, 'epoch': 3} {'type': 'loss', 'content': 0.084284707903862, 'timestamp': '2025-09-30 22:46:42.093041', 'step': 21257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:42.157287', 'step': 21257, 'epoch': 3} {'type': 'loss', 'content': 0.16056673228740692, 'timestamp': '2025-09-30 22:46:42.167681', 'step': 21258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:42.229470', 'step': 21258, 'epoch': 3} {'type': 'loss', 'content': 0.08230730891227722, 'timestamp': '2025-09-30 22:46:42.233889', 'step': 21259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:42.294479', 'step': 21259, 'epoch': 3} {'type': 'loss', 'content': 0.07871219515800476, 'timestamp': '2025-09-30 22:46:42.302476', 'step': 21260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:42.376960', 'step': 21260, 'epoch': 3} {'type': 'loss', 'content': 0.08977356553077698, 'timestamp': '2025-09-30 22:46:42.382777', 'step': 21261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:42.460522', 'step': 21261, 'epoch': 3} {'type': 'loss', 'content': 0.12019170820713043, 'timestamp': '2025-09-30 22:46:42.468349', 'step': 21262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:42.530885', 'step': 21262, 'epoch': 3} {'type': 'loss', 'content': 0.14133451879024506, 'timestamp': '2025-09-30 22:46:42.534859', 'step': 21263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:42.619689', 'step': 21263, 'epoch': 3} {'type': 'loss', 'content': 0.06984799355268478, 'timestamp': '2025-09-30 22:46:42.626718', 'step': 21264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:42.690212', 'step': 21264, 'epoch': 3} {'type': 'loss', 'content': 0.1287580281496048, 'timestamp': '2025-09-30 22:46:42.698224', 'step': 21265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:42.773744', 'step': 21265, 'epoch': 3} {'type': 'loss', 'content': 0.05069584399461746, 'timestamp': '2025-09-30 22:46:42.777407', 'step': 21266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:42.848913', 'step': 21266, 'epoch': 3} {'type': 'loss', 'content': 0.09458131343126297, 'timestamp': '2025-09-30 22:46:42.856183', 'step': 21267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:42.934216', 'step': 21267, 'epoch': 3} {'type': 'loss', 'content': 0.07112350314855576, 'timestamp': '2025-09-30 22:46:42.944076', 'step': 21268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:43.016672', 'step': 21268, 'epoch': 3} {'type': 'loss', 'content': 0.07126173377037048, 'timestamp': '2025-09-30 22:46:43.025033', 'step': 21269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:43.113018', 'step': 21269, 'epoch': 3} {'type': 'loss', 'content': 0.11828219890594482, 'timestamp': '2025-09-30 22:46:43.118686', 'step': 21270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:43.193990', 'step': 21270, 'epoch': 3} {'type': 'loss', 'content': 0.04161125794053078, 'timestamp': '2025-09-30 22:46:43.198423', 'step': 21271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:43.280641', 'step': 21271, 'epoch': 3} {'type': 'loss', 'content': 0.06847108900547028, 'timestamp': '2025-09-30 22:46:43.289990', 'step': 21272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:43.357690', 'step': 21272, 'epoch': 3} {'type': 'loss', 'content': 0.07181142270565033, 'timestamp': '2025-09-30 22:46:43.364388', 'step': 21273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:43.444195', 'step': 21273, 'epoch': 3} {'type': 'loss', 'content': 0.11480791866779327, 'timestamp': '2025-09-30 22:46:43.450302', 'step': 21274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:43.532753', 'step': 21274, 'epoch': 3} {'type': 'loss', 'content': 0.15542824566364288, 'timestamp': '2025-09-30 22:46:43.538432', 'step': 21275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:43.627240', 'step': 21275, 'epoch': 3} {'type': 'loss', 'content': 0.10741309821605682, 'timestamp': '2025-09-30 22:46:43.634593', 'step': 21276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:43.703349', 'step': 21276, 'epoch': 3} {'type': 'loss', 'content': 0.07815240323543549, 'timestamp': '2025-09-30 22:46:43.708000', 'step': 21277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:43.782946', 'step': 21277, 'epoch': 3} {'type': 'loss', 'content': 0.14664378762245178, 'timestamp': '2025-09-30 22:46:43.787076', 'step': 21278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:43.852172', 'step': 21278, 'epoch': 3} {'type': 'loss', 'content': 0.09658203274011612, 'timestamp': '2025-09-30 22:46:43.858155', 'step': 21279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:43.918269', 'step': 21279, 'epoch': 3} {'type': 'loss', 'content': 0.1832200586795807, 'timestamp': '2025-09-30 22:46:43.925940', 'step': 21280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:43.995963', 'step': 21280, 'epoch': 3} {'type': 'loss', 'content': 0.057124730199575424, 'timestamp': '2025-09-30 22:46:43.999628', 'step': 21281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:44.063362', 'step': 21281, 'epoch': 3} {'type': 'loss', 'content': 0.07563440501689911, 'timestamp': '2025-09-30 22:46:44.069358', 'step': 21282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:44.142270', 'step': 21282, 'epoch': 3} {'type': 'loss', 'content': 0.10534591972827911, 'timestamp': '2025-09-30 22:46:44.149756', 'step': 21283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:44.220265', 'step': 21283, 'epoch': 3} {'type': 'loss', 'content': 0.13775581121444702, 'timestamp': '2025-09-30 22:46:44.227236', 'step': 21284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:44.288453', 'step': 21284, 'epoch': 3} {'type': 'loss', 'content': 0.06512747704982758, 'timestamp': '2025-09-30 22:46:44.291905', 'step': 21285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:44.367082', 'step': 21285, 'epoch': 3} {'type': 'loss', 'content': 0.05518289655447006, 'timestamp': '2025-09-30 22:46:44.370901', 'step': 21286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:44.442953', 'step': 21286, 'epoch': 3} {'type': 'loss', 'content': 0.08297524601221085, 'timestamp': '2025-09-30 22:46:44.447076', 'step': 21287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:44.525253', 'step': 21287, 'epoch': 3} {'type': 'loss', 'content': 0.08937235921621323, 'timestamp': '2025-09-30 22:46:44.532899', 'step': 21288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:44.599079', 'step': 21288, 'epoch': 3} {'type': 'loss', 'content': 0.10626639425754547, 'timestamp': '2025-09-30 22:46:44.603510', 'step': 21289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:44.663515', 'step': 21289, 'epoch': 3} {'type': 'loss', 'content': 0.1452159434556961, 'timestamp': '2025-09-30 22:46:44.668633', 'step': 21290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:44.730725', 'step': 21290, 'epoch': 3} {'type': 'loss', 'content': 0.101492740213871, 'timestamp': '2025-09-30 22:46:44.734262', 'step': 21291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:44.794694', 'step': 21291, 'epoch': 3} {'type': 'loss', 'content': 0.0634656697511673, 'timestamp': '2025-09-30 22:46:44.811126', 'step': 21292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:44.873167', 'step': 21292, 'epoch': 3} {'type': 'loss', 'content': 0.09311783313751221, 'timestamp': '2025-09-30 22:46:44.875943', 'step': 21293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:44.935089', 'step': 21293, 'epoch': 3} {'type': 'loss', 'content': 0.06031108275055885, 'timestamp': '2025-09-30 22:46:44.938641', 'step': 21294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:45.004498', 'step': 21294, 'epoch': 3} {'type': 'loss', 'content': 0.08589779585599899, 'timestamp': '2025-09-30 22:46:45.008410', 'step': 21295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:45.075448', 'step': 21295, 'epoch': 3} {'type': 'loss', 'content': 0.11208466440439224, 'timestamp': '2025-09-30 22:46:45.083894', 'step': 21296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:45.149110', 'step': 21296, 'epoch': 3} {'type': 'loss', 'content': 0.057003654539585114, 'timestamp': '2025-09-30 22:46:45.153333', 'step': 21297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:45.217233', 'step': 21297, 'epoch': 3} {'type': 'loss', 'content': 0.09042567759752274, 'timestamp': '2025-09-30 22:46:45.221373', 'step': 21298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:45.281378', 'step': 21298, 'epoch': 3} {'type': 'loss', 'content': 0.09586536139249802, 'timestamp': '2025-09-30 22:46:45.285503', 'step': 21299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:45.346581', 'step': 21299, 'epoch': 3} {'type': 'loss', 'content': 0.10050079226493835, 'timestamp': '2025-09-30 22:46:45.355200', 'step': 21300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:45.418357', 'step': 21300, 'epoch': 3} {'type': 'loss', 'content': 0.05091249197721481, 'timestamp': '2025-09-30 22:46:45.422271', 'step': 21301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:45.480748', 'step': 21301, 'epoch': 3} {'type': 'loss', 'content': 0.09201747179031372, 'timestamp': '2025-09-30 22:46:45.491023', 'step': 21302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:45.549173', 'step': 21302, 'epoch': 3} {'type': 'loss', 'content': 0.05141554772853851, 'timestamp': '2025-09-30 22:46:45.551813', 'step': 21303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:45.612798', 'step': 21303, 'epoch': 3} {'type': 'loss', 'content': 0.07563840597867966, 'timestamp': '2025-09-30 22:46:45.627611', 'step': 21304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:45.685791', 'step': 21304, 'epoch': 3} {'type': 'loss', 'content': 0.08682917058467865, 'timestamp': '2025-09-30 22:46:45.690003', 'step': 21305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:45.752976', 'step': 21305, 'epoch': 3} {'type': 'loss', 'content': 0.16202712059020996, 'timestamp': '2025-09-30 22:46:45.757089', 'step': 21306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:45.816207', 'step': 21306, 'epoch': 3} {'type': 'loss', 'content': 0.13481885194778442, 'timestamp': '2025-09-30 22:46:45.820633', 'step': 21307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:45.893506', 'step': 21307, 'epoch': 3} {'type': 'loss', 'content': 0.16815303266048431, 'timestamp': '2025-09-30 22:46:45.900743', 'step': 21308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:45.960078', 'step': 21308, 'epoch': 3} {'type': 'loss', 'content': 0.10473689436912537, 'timestamp': '2025-09-30 22:46:45.964100', 'step': 21309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:46.024241', 'step': 21309, 'epoch': 3} {'type': 'loss', 'content': 0.0657324567437172, 'timestamp': '2025-09-30 22:46:46.027332', 'step': 21310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:46.102509', 'step': 21310, 'epoch': 3} {'type': 'loss', 'content': 0.05647795647382736, 'timestamp': '2025-09-30 22:46:46.107209', 'step': 21311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:46.181834', 'step': 21311, 'epoch': 3} {'type': 'loss', 'content': 0.13194149732589722, 'timestamp': '2025-09-30 22:46:46.190177', 'step': 21312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:46.250259', 'step': 21312, 'epoch': 3} {'type': 'loss', 'content': 0.0748293399810791, 'timestamp': '2025-09-30 22:46:46.252981', 'step': 21313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:46.323876', 'step': 21313, 'epoch': 3} {'type': 'loss', 'content': 0.13707390427589417, 'timestamp': '2025-09-30 22:46:46.327548', 'step': 21314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:46.386423', 'step': 21314, 'epoch': 3} {'type': 'loss', 'content': 0.06840846687555313, 'timestamp': '2025-09-30 22:46:46.389533', 'step': 21315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:46.461054', 'step': 21315, 'epoch': 3} {'type': 'loss', 'content': 0.1464087814092636, 'timestamp': '2025-09-30 22:46:46.469152', 'step': 21316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:46.528168', 'step': 21316, 'epoch': 3} {'type': 'loss', 'content': 0.050760794430971146, 'timestamp': '2025-09-30 22:46:46.531873', 'step': 21317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:46.603187', 'step': 21317, 'epoch': 3} {'type': 'loss', 'content': 0.11653831601142883, 'timestamp': '2025-09-30 22:46:46.606953', 'step': 21318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:46.667243', 'step': 21318, 'epoch': 3} {'type': 'loss', 'content': 0.11853723973035812, 'timestamp': '2025-09-30 22:46:46.671146', 'step': 21319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:46.730751', 'step': 21319, 'epoch': 3} {'type': 'loss', 'content': 0.06932615488767624, 'timestamp': '2025-09-30 22:46:46.737662', 'step': 21320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:46.810786', 'step': 21320, 'epoch': 3} {'type': 'loss', 'content': 0.12239739298820496, 'timestamp': '2025-09-30 22:46:46.814568', 'step': 21321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:46.887209', 'step': 21321, 'epoch': 3} {'type': 'loss', 'content': 0.1459464132785797, 'timestamp': '2025-09-30 22:46:46.891794', 'step': 21322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:46.953294', 'step': 21322, 'epoch': 3} {'type': 'loss', 'content': 0.13365468382835388, 'timestamp': '2025-09-30 22:46:46.958130', 'step': 21323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:47.028823', 'step': 21323, 'epoch': 3} {'type': 'loss', 'content': 0.11239577829837799, 'timestamp': '2025-09-30 22:46:47.035625', 'step': 21324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:47.115624', 'step': 21324, 'epoch': 3} {'type': 'loss', 'content': 0.14027784764766693, 'timestamp': '2025-09-30 22:46:47.120357', 'step': 21325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:47.193146', 'step': 21325, 'epoch': 3} {'type': 'loss', 'content': 0.05012389272451401, 'timestamp': '2025-09-30 22:46:47.196911', 'step': 21326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:47.272182', 'step': 21326, 'epoch': 3} {'type': 'loss', 'content': 0.1320636123418808, 'timestamp': '2025-09-30 22:46:47.283336', 'step': 21327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:47.350236', 'step': 21327, 'epoch': 3} {'type': 'loss', 'content': 0.040805842727422714, 'timestamp': '2025-09-30 22:46:47.364238', 'step': 21328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:47.430222', 'step': 21328, 'epoch': 3} {'type': 'loss', 'content': 0.04420037940144539, 'timestamp': '2025-09-30 22:46:47.434076', 'step': 21329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:47.494849', 'step': 21329, 'epoch': 3} {'type': 'loss', 'content': 0.09306550025939941, 'timestamp': '2025-09-30 22:46:47.506451', 'step': 21330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:47.564355', 'step': 21330, 'epoch': 3} {'type': 'loss', 'content': 0.029569845646619797, 'timestamp': '2025-09-30 22:46:47.567683', 'step': 21331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:47.634660', 'step': 21331, 'epoch': 3} {'type': 'loss', 'content': 0.04314703866839409, 'timestamp': '2025-09-30 22:46:47.648994', 'step': 21332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:46:47.710764', 'step': 21332, 'epoch': 3} {'type': 'loss', 'content': 0.06092390790581703, 'timestamp': '2025-09-30 22:46:47.713932', 'step': 21333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:47.772435', 'step': 21333, 'epoch': 3} {'type': 'loss', 'content': 0.15114250779151917, 'timestamp': '2025-09-30 22:46:47.775818', 'step': 21334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:47.846850', 'step': 21334, 'epoch': 3} {'type': 'loss', 'content': 0.08279269933700562, 'timestamp': '2025-09-30 22:46:47.851142', 'step': 21335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:47.914603', 'step': 21335, 'epoch': 3} {'type': 'loss', 'content': 0.06981558352708817, 'timestamp': '2025-09-30 22:46:47.922764', 'step': 21336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:47.981350', 'step': 21336, 'epoch': 3} {'type': 'loss', 'content': 0.05261370167136192, 'timestamp': '2025-09-30 22:46:47.985366', 'step': 21337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:48.042933', 'step': 21337, 'epoch': 3} {'type': 'loss', 'content': 0.04159180819988251, 'timestamp': '2025-09-30 22:46:48.046873', 'step': 21338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:48.105766', 'step': 21338, 'epoch': 3} {'type': 'loss', 'content': 0.128729909658432, 'timestamp': '2025-09-30 22:46:48.110355', 'step': 21339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:48.171354', 'step': 21339, 'epoch': 3} {'type': 'loss', 'content': 0.07558954507112503, 'timestamp': '2025-09-30 22:46:48.180516', 'step': 21340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:48.257258', 'step': 21340, 'epoch': 3} {'type': 'loss', 'content': 0.048808302730321884, 'timestamp': '2025-09-30 22:46:48.268543', 'step': 21341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:48.329768', 'step': 21341, 'epoch': 3} {'type': 'loss', 'content': 0.15209302306175232, 'timestamp': '2025-09-30 22:46:48.334566', 'step': 21342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:48.398375', 'step': 21342, 'epoch': 3} {'type': 'loss', 'content': 0.08365409821271896, 'timestamp': '2025-09-30 22:46:48.401035', 'step': 21343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:48.471654', 'step': 21343, 'epoch': 3} {'type': 'loss', 'content': 0.075518399477005, 'timestamp': '2025-09-30 22:46:48.479428', 'step': 21344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:48.545844', 'step': 21344, 'epoch': 3} {'type': 'loss', 'content': 0.044436052441596985, 'timestamp': '2025-09-30 22:46:48.548990', 'step': 21345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:48.607406', 'step': 21345, 'epoch': 3} {'type': 'loss', 'content': 0.0406356044113636, 'timestamp': '2025-09-30 22:46:48.611939', 'step': 21346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:48.681070', 'step': 21346, 'epoch': 3} {'type': 'loss', 'content': 0.074568971991539, 'timestamp': '2025-09-30 22:46:48.694028', 'step': 21347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:48.763793', 'step': 21347, 'epoch': 3} {'type': 'loss', 'content': 0.0922446995973587, 'timestamp': '2025-09-30 22:46:48.771320', 'step': 21348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:48.830978', 'step': 21348, 'epoch': 3} {'type': 'loss', 'content': 0.05700771138072014, 'timestamp': '2025-09-30 22:46:48.835062', 'step': 21349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:48.897100', 'step': 21349, 'epoch': 3} {'type': 'loss', 'content': 0.1331648826599121, 'timestamp': '2025-09-30 22:46:48.900555', 'step': 21350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:48.960385', 'step': 21350, 'epoch': 3} {'type': 'loss', 'content': 0.08259134739637375, 'timestamp': '2025-09-30 22:46:48.974262', 'step': 21351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:49.034257', 'step': 21351, 'epoch': 3} {'type': 'loss', 'content': 0.08366598933935165, 'timestamp': '2025-09-30 22:46:49.045983', 'step': 21352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:49.113504', 'step': 21352, 'epoch': 3} {'type': 'loss', 'content': 0.07487212121486664, 'timestamp': '2025-09-30 22:46:49.118040', 'step': 21353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:49.190664', 'step': 21353, 'epoch': 3} {'type': 'loss', 'content': 0.045461270958185196, 'timestamp': '2025-09-30 22:46:49.195407', 'step': 21354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:49.256810', 'step': 21354, 'epoch': 3} {'type': 'loss', 'content': 0.10070069134235382, 'timestamp': '2025-09-30 22:46:49.265750', 'step': 21355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:49.324604', 'step': 21355, 'epoch': 3} {'type': 'loss', 'content': 0.12737120687961578, 'timestamp': '2025-09-30 22:46:49.332422', 'step': 21356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:49.394240', 'step': 21356, 'epoch': 3} {'type': 'loss', 'content': 0.07742056995630264, 'timestamp': '2025-09-30 22:46:49.399841', 'step': 21357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:49.460122', 'step': 21357, 'epoch': 3} {'type': 'loss', 'content': 0.07021763920783997, 'timestamp': '2025-09-30 22:46:49.475264', 'step': 21358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:49.548990', 'step': 21358, 'epoch': 3} {'type': 'loss', 'content': 0.09384707361459732, 'timestamp': '2025-09-30 22:46:49.554637', 'step': 21359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:49.616769', 'step': 21359, 'epoch': 3} {'type': 'loss', 'content': 0.029511133208870888, 'timestamp': '2025-09-30 22:46:49.625600', 'step': 21360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:49.692794', 'step': 21360, 'epoch': 3} {'type': 'loss', 'content': 0.10124734044075012, 'timestamp': '2025-09-30 22:46:49.698673', 'step': 21361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:49.792486', 'step': 21361, 'epoch': 3} {'type': 'loss', 'content': 0.09372090548276901, 'timestamp': '2025-09-30 22:46:49.797841', 'step': 21362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:49.867159', 'step': 21362, 'epoch': 3} {'type': 'loss', 'content': 0.04650186374783516, 'timestamp': '2025-09-30 22:46:49.881503', 'step': 21363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:49.951854', 'step': 21363, 'epoch': 3} {'type': 'loss', 'content': 0.08994001150131226, 'timestamp': '2025-09-30 22:46:49.969676', 'step': 21364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:50.047129', 'step': 21364, 'epoch': 3} {'type': 'loss', 'content': 0.10542023926973343, 'timestamp': '2025-09-30 22:46:50.061354', 'step': 21365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:50.126302', 'step': 21365, 'epoch': 3} {'type': 'loss', 'content': 0.14323057234287262, 'timestamp': '2025-09-30 22:46:50.129258', 'step': 21366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:50.188213', 'step': 21366, 'epoch': 3} {'type': 'loss', 'content': 0.12690068781375885, 'timestamp': '2025-09-30 22:46:50.194393', 'step': 21367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:50.255151', 'step': 21367, 'epoch': 3} {'type': 'loss', 'content': 0.13885359466075897, 'timestamp': '2025-09-30 22:46:50.263972', 'step': 21368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:50.324812', 'step': 21368, 'epoch': 3} {'type': 'loss', 'content': 0.041171468794345856, 'timestamp': '2025-09-30 22:46:50.339981', 'step': 21369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:50.403293', 'step': 21369, 'epoch': 3} {'type': 'loss', 'content': 0.03011602722108364, 'timestamp': '2025-09-30 22:46:50.411963', 'step': 21370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:50.474394', 'step': 21370, 'epoch': 3} {'type': 'loss', 'content': 0.029829736799001694, 'timestamp': '2025-09-30 22:46:50.480068', 'step': 21371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:50.556129', 'step': 21371, 'epoch': 3} {'type': 'loss', 'content': 0.08194577693939209, 'timestamp': '2025-09-30 22:46:50.564277', 'step': 21372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:50.630651', 'step': 21372, 'epoch': 3} {'type': 'loss', 'content': 0.0821155309677124, 'timestamp': '2025-09-30 22:46:50.639844', 'step': 21373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:50.701571', 'step': 21373, 'epoch': 3} {'type': 'loss', 'content': 0.1123664379119873, 'timestamp': '2025-09-30 22:46:50.717232', 'step': 21374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:50.775899', 'step': 21374, 'epoch': 3} {'type': 'loss', 'content': 0.08981465548276901, 'timestamp': '2025-09-30 22:46:50.791260', 'step': 21375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:50.850769', 'step': 21375, 'epoch': 3} {'type': 'loss', 'content': 0.1156301498413086, 'timestamp': '2025-09-30 22:46:50.859065', 'step': 21376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:50.930267', 'step': 21376, 'epoch': 3} {'type': 'loss', 'content': 0.09240931272506714, 'timestamp': '2025-09-30 22:46:50.935190', 'step': 21377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:51.006262', 'step': 21377, 'epoch': 3} {'type': 'loss', 'content': 0.06151347607374191, 'timestamp': '2025-09-30 22:46:51.011215', 'step': 21378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.074850', 'step': 21378, 'epoch': 3} {'type': 'loss', 'content': 0.14112301170825958, 'timestamp': '2025-09-30 22:46:51.079752', 'step': 21379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:51.138078', 'step': 21379, 'epoch': 3} {'type': 'loss', 'content': 0.10444696247577667, 'timestamp': '2025-09-30 22:46:51.147029', 'step': 21380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.205869', 'step': 21380, 'epoch': 3} {'type': 'loss', 'content': 0.04528393968939781, 'timestamp': '2025-09-30 22:46:51.210475', 'step': 21381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:51.273988', 'step': 21381, 'epoch': 3} {'type': 'loss', 'content': 0.07144323736429214, 'timestamp': '2025-09-30 22:46:51.278431', 'step': 21382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:51.341794', 'step': 21382, 'epoch': 3} {'type': 'loss', 'content': 0.10669770836830139, 'timestamp': '2025-09-30 22:46:51.345205', 'step': 21383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:51.403497', 'step': 21383, 'epoch': 3} {'type': 'loss', 'content': 0.13028205931186676, 'timestamp': '2025-09-30 22:46:51.419197', 'step': 21384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.479250', 'step': 21384, 'epoch': 3} {'type': 'loss', 'content': 0.13723808526992798, 'timestamp': '2025-09-30 22:46:51.489116', 'step': 21385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.548149', 'step': 21385, 'epoch': 3} {'type': 'loss', 'content': 0.03477370738983154, 'timestamp': '2025-09-30 22:46:51.552180', 'step': 21386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:51.613536', 'step': 21386, 'epoch': 3} {'type': 'loss', 'content': 0.07445055991411209, 'timestamp': '2025-09-30 22:46:51.617255', 'step': 21387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.676901', 'step': 21387, 'epoch': 3} {'type': 'loss', 'content': 0.13508272171020508, 'timestamp': '2025-09-30 22:46:51.685871', 'step': 21388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.749678', 'step': 21388, 'epoch': 3} {'type': 'loss', 'content': 0.06109939515590668, 'timestamp': '2025-09-30 22:46:51.753416', 'step': 21389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.814204', 'step': 21389, 'epoch': 3} {'type': 'loss', 'content': 0.03350110724568367, 'timestamp': '2025-09-30 22:46:51.818024', 'step': 21390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:51.875477', 'step': 21390, 'epoch': 3} {'type': 'loss', 'content': 0.11294818669557571, 'timestamp': '2025-09-30 22:46:51.880269', 'step': 21391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:51.940987', 'step': 21391, 'epoch': 3} {'type': 'loss', 'content': 0.044314898550510406, 'timestamp': '2025-09-30 22:46:51.949557', 'step': 21392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:52.009320', 'step': 21392, 'epoch': 3} {'type': 'loss', 'content': 0.08199166506528854, 'timestamp': '2025-09-30 22:46:52.020927', 'step': 21393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:52.080412', 'step': 21393, 'epoch': 3} {'type': 'loss', 'content': 0.07023574411869049, 'timestamp': '2025-09-30 22:46:52.083767', 'step': 21394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:52.153079', 'step': 21394, 'epoch': 3} {'type': 'loss', 'content': 0.06602336466312408, 'timestamp': '2025-09-30 22:46:52.156323', 'step': 21395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:52.219884', 'step': 21395, 'epoch': 3} {'type': 'loss', 'content': 0.05250369384884834, 'timestamp': '2025-09-30 22:46:52.227614', 'step': 21396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:52.296431', 'step': 21396, 'epoch': 3} {'type': 'loss', 'content': 0.0567064993083477, 'timestamp': '2025-09-30 22:46:52.302972', 'step': 21397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:52.364450', 'step': 21397, 'epoch': 3} {'type': 'loss', 'content': 0.057907573878765106, 'timestamp': '2025-09-30 22:46:52.370400', 'step': 21398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:52.428769', 'step': 21398, 'epoch': 3} {'type': 'loss', 'content': 0.08976787328720093, 'timestamp': '2025-09-30 22:46:52.434743', 'step': 21399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:52.494256', 'step': 21399, 'epoch': 3} {'type': 'loss', 'content': 0.08549374341964722, 'timestamp': '2025-09-30 22:46:52.513580', 'step': 21400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:52.575699', 'step': 21400, 'epoch': 3} {'type': 'loss', 'content': 0.1236259862780571, 'timestamp': '2025-09-30 22:46:52.578956', 'step': 21401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:52.652954', 'step': 21401, 'epoch': 3} {'type': 'loss', 'content': 0.049854081124067307, 'timestamp': '2025-09-30 22:46:52.658124', 'step': 21402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:52.720951', 'step': 21402, 'epoch': 3} {'type': 'loss', 'content': 0.09884529560804367, 'timestamp': '2025-09-30 22:46:52.726350', 'step': 21403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:52.793819', 'step': 21403, 'epoch': 3} {'type': 'loss', 'content': 0.07059097290039062, 'timestamp': '2025-09-30 22:46:52.802047', 'step': 21404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:52.861557', 'step': 21404, 'epoch': 3} {'type': 'loss', 'content': 0.09804920852184296, 'timestamp': '2025-09-30 22:46:52.874137', 'step': 21405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:46:52.946574', 'step': 21405, 'epoch': 3} {'type': 'loss', 'content': 0.06492728739976883, 'timestamp': '2025-09-30 22:46:52.952267', 'step': 21406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:53.014697', 'step': 21406, 'epoch': 3} {'type': 'loss', 'content': 0.07156950980424881, 'timestamp': '2025-09-30 22:46:53.018861', 'step': 21407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:46:53.080613', 'step': 21407, 'epoch': 3} {'type': 'loss', 'content': 0.04798807203769684, 'timestamp': '2025-09-30 22:46:53.090019', 'step': 21408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:53.172449', 'step': 21408, 'epoch': 3} {'type': 'loss', 'content': 0.0682886615395546, 'timestamp': '2025-09-30 22:46:53.180788', 'step': 21409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:53.243076', 'step': 21409, 'epoch': 3} {'type': 'loss', 'content': 0.08456805348396301, 'timestamp': '2025-09-30 22:46:53.248049', 'step': 21410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:53.308678', 'step': 21410, 'epoch': 3} {'type': 'loss', 'content': 0.06351565569639206, 'timestamp': '2025-09-30 22:46:53.314243', 'step': 21411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:53.403226', 'step': 21411, 'epoch': 3} {'type': 'loss', 'content': 0.07502807676792145, 'timestamp': '2025-09-30 22:46:53.412401', 'step': 21412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:53.473211', 'step': 21412, 'epoch': 3} {'type': 'loss', 'content': 0.12908530235290527, 'timestamp': '2025-09-30 22:46:53.478409', 'step': 21413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:53.536591', 'step': 21413, 'epoch': 3} {'type': 'loss', 'content': 0.0448078028857708, 'timestamp': '2025-09-30 22:46:53.544154', 'step': 21414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:53.606160', 'step': 21414, 'epoch': 3} {'type': 'loss', 'content': 0.07383174449205399, 'timestamp': '2025-09-30 22:46:53.612492', 'step': 21415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:53.671316', 'step': 21415, 'epoch': 3} {'type': 'loss', 'content': 0.055318668484687805, 'timestamp': '2025-09-30 22:46:53.680916', 'step': 21416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:53.747776', 'step': 21416, 'epoch': 3} {'type': 'loss', 'content': 0.06597660481929779, 'timestamp': '2025-09-30 22:46:53.752551', 'step': 21417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:53.811725', 'step': 21417, 'epoch': 3} {'type': 'loss', 'content': 0.11515859514474869, 'timestamp': '2025-09-30 22:46:53.816376', 'step': 21418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:53.877959', 'step': 21418, 'epoch': 3} {'type': 'loss', 'content': 0.09583257138729095, 'timestamp': '2025-09-30 22:46:53.883094', 'step': 21419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:53.954924', 'step': 21419, 'epoch': 3} {'type': 'loss', 'content': 0.15958312153816223, 'timestamp': '2025-09-30 22:46:53.962716', 'step': 21420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:54.021602', 'step': 21420, 'epoch': 3} {'type': 'loss', 'content': 0.1353621780872345, 'timestamp': '2025-09-30 22:46:54.038526', 'step': 21421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:46:54.100256', 'step': 21421, 'epoch': 3} {'type': 'loss', 'content': 0.12318704277276993, 'timestamp': '2025-09-30 22:46:54.104574', 'step': 21422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:54.175681', 'step': 21422, 'epoch': 3} {'type': 'loss', 'content': 0.09720047563314438, 'timestamp': '2025-09-30 22:46:54.180578', 'step': 21423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:54.257588', 'step': 21423, 'epoch': 3} {'type': 'loss', 'content': 0.09684900939464569, 'timestamp': '2025-09-30 22:46:54.265299', 'step': 21424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:54.332476', 'step': 21424, 'epoch': 3} {'type': 'loss', 'content': 0.041789710521698, 'timestamp': '2025-09-30 22:46:54.336928', 'step': 21425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:54.415308', 'step': 21425, 'epoch': 3} {'type': 'loss', 'content': 0.08456025272607803, 'timestamp': '2025-09-30 22:46:54.421372', 'step': 21426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:54.488713', 'step': 21426, 'epoch': 3} {'type': 'loss', 'content': 0.13644231855869293, 'timestamp': '2025-09-30 22:46:54.494493', 'step': 21427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:54.554972', 'step': 21427, 'epoch': 3} {'type': 'loss', 'content': 0.10448141396045685, 'timestamp': '2025-09-30 22:46:54.572194', 'step': 21428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:46:54.630998', 'step': 21428, 'epoch': 3} {'type': 'loss', 'content': 0.15176838636398315, 'timestamp': '2025-09-30 22:46:54.635749', 'step': 21429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:54.694906', 'step': 21429, 'epoch': 3} {'type': 'loss', 'content': 0.07666277885437012, 'timestamp': '2025-09-30 22:46:54.708776', 'step': 21430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:54.768781', 'step': 21430, 'epoch': 3} {'type': 'loss', 'content': 0.09039906412363052, 'timestamp': '2025-09-30 22:46:54.783782', 'step': 21431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:54.843341', 'step': 21431, 'epoch': 3} {'type': 'loss', 'content': 0.11239226162433624, 'timestamp': '2025-09-30 22:46:54.851045', 'step': 21432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:54.913796', 'step': 21432, 'epoch': 3} {'type': 'loss', 'content': 0.03874361142516136, 'timestamp': '2025-09-30 22:46:54.927914', 'step': 21433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:54.991502', 'step': 21433, 'epoch': 3} {'type': 'loss', 'content': 0.13675731420516968, 'timestamp': '2025-09-30 22:46:54.996218', 'step': 21434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:46:55.055195', 'step': 21434, 'epoch': 3} {'type': 'loss', 'content': 0.09900099039077759, 'timestamp': '2025-09-30 22:46:55.064036', 'step': 21435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:46:55.137759', 'step': 21435, 'epoch': 3} {'type': 'loss', 'content': 0.14720453321933746, 'timestamp': '2025-09-30 22:46:55.146081', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:47:11.032792', 'step': 21436, 'epoch': 3} {'type': 'pplx', 'content': 7805.512516787449, 'timestamp': '2025-09-30 22:47:11.038037', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:11.099724', 'step': 21436, 'epoch': 3} {'type': 'loss', 'content': 0.07903943210840225, 'timestamp': '2025-09-30 22:47:11.108320', 'step': 21437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:11.169151', 'step': 21437, 'epoch': 3} {'type': 'loss', 'content': 0.1132875606417656, 'timestamp': '2025-09-30 22:47:11.172544', 'step': 21438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:11.248060', 'step': 21438, 'epoch': 3} {'type': 'loss', 'content': 0.1343008130788803, 'timestamp': '2025-09-30 22:47:11.256686', 'step': 21439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:11.322785', 'step': 21439, 'epoch': 3} {'type': 'loss', 'content': 0.04455108195543289, 'timestamp': '2025-09-30 22:47:11.330429', 'step': 21440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:11.397476', 'step': 21440, 'epoch': 3} {'type': 'loss', 'content': 0.06182340532541275, 'timestamp': '2025-09-30 22:47:11.400760', 'step': 21441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:11.470199', 'step': 21441, 'epoch': 3} {'type': 'loss', 'content': 0.07724389433860779, 'timestamp': '2025-09-30 22:47:11.474152', 'step': 21442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:11.543405', 'step': 21442, 'epoch': 3} {'type': 'loss', 'content': 0.09383030980825424, 'timestamp': '2025-09-30 22:47:11.547451', 'step': 21443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:11.608418', 'step': 21443, 'epoch': 3} {'type': 'loss', 'content': 0.07522061467170715, 'timestamp': '2025-09-30 22:47:11.617827', 'step': 21444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:11.677634', 'step': 21444, 'epoch': 3} {'type': 'loss', 'content': 0.06640074402093887, 'timestamp': '2025-09-30 22:47:11.682816', 'step': 21445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:11.746595', 'step': 21445, 'epoch': 3} {'type': 'loss', 'content': 0.07663335651159286, 'timestamp': '2025-09-30 22:47:11.749634', 'step': 21446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:11.809136', 'step': 21446, 'epoch': 3} {'type': 'loss', 'content': 0.1600949615240097, 'timestamp': '2025-09-30 22:47:11.812377', 'step': 21447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:11.878320', 'step': 21447, 'epoch': 3} {'type': 'loss', 'content': 0.05412563681602478, 'timestamp': '2025-09-30 22:47:11.884980', 'step': 21448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:11.947035', 'step': 21448, 'epoch': 3} {'type': 'loss', 'content': 0.09011015295982361, 'timestamp': '2025-09-30 22:47:11.951459', 'step': 21449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:12.011139', 'step': 21449, 'epoch': 3} {'type': 'loss', 'content': 0.16520646214485168, 'timestamp': '2025-09-30 22:47:12.015616', 'step': 21450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:12.074803', 'step': 21450, 'epoch': 3} {'type': 'loss', 'content': 0.10481410473585129, 'timestamp': '2025-09-30 22:47:12.086842', 'step': 21451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:12.189715', 'step': 21451, 'epoch': 3} {'type': 'loss', 'content': 0.0703621357679367, 'timestamp': '2025-09-30 22:47:12.197701', 'step': 21452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:12.258574', 'step': 21452, 'epoch': 3} {'type': 'loss', 'content': 0.0921388491988182, 'timestamp': '2025-09-30 22:47:12.262615', 'step': 21453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:12.324765', 'step': 21453, 'epoch': 3} {'type': 'loss', 'content': 0.09566959738731384, 'timestamp': '2025-09-30 22:47:12.340002', 'step': 21454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:12.399755', 'step': 21454, 'epoch': 3} {'type': 'loss', 'content': 0.014768210239708424, 'timestamp': '2025-09-30 22:47:12.404400', 'step': 21455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:12.464483', 'step': 21455, 'epoch': 3} {'type': 'loss', 'content': 0.03481093421578407, 'timestamp': '2025-09-30 22:47:12.471987', 'step': 21456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:12.532846', 'step': 21456, 'epoch': 3} {'type': 'loss', 'content': 0.023454107344150543, 'timestamp': '2025-09-30 22:47:12.538810', 'step': 21457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:12.620788', 'step': 21457, 'epoch': 3} {'type': 'loss', 'content': 0.10922566801309586, 'timestamp': '2025-09-30 22:47:12.630958', 'step': 21458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:12.689979', 'step': 21458, 'epoch': 3} {'type': 'loss', 'content': 0.092146135866642, 'timestamp': '2025-09-30 22:47:12.694358', 'step': 21459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:12.757283', 'step': 21459, 'epoch': 3} {'type': 'loss', 'content': 0.07225436717271805, 'timestamp': '2025-09-30 22:47:12.773993', 'step': 21460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:12.850391', 'step': 21460, 'epoch': 3} {'type': 'loss', 'content': 0.07270187884569168, 'timestamp': '2025-09-30 22:47:12.854007', 'step': 21461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:12.920174', 'step': 21461, 'epoch': 3} {'type': 'loss', 'content': 0.0754389688372612, 'timestamp': '2025-09-30 22:47:12.932255', 'step': 21462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:12.993515', 'step': 21462, 'epoch': 3} {'type': 'loss', 'content': 0.08782213181257248, 'timestamp': '2025-09-30 22:47:12.997979', 'step': 21463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:13.065123', 'step': 21463, 'epoch': 3} {'type': 'loss', 'content': 0.06720534712076187, 'timestamp': '2025-09-30 22:47:13.083474', 'step': 21464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:13.156993', 'step': 21464, 'epoch': 3} {'type': 'loss', 'content': 0.03845912590622902, 'timestamp': '2025-09-30 22:47:13.160854', 'step': 21465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:13.229574', 'step': 21465, 'epoch': 3} {'type': 'loss', 'content': 0.044885072857141495, 'timestamp': '2025-09-30 22:47:13.233756', 'step': 21466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:13.306357', 'step': 21466, 'epoch': 3} {'type': 'loss', 'content': 0.02691691182553768, 'timestamp': '2025-09-30 22:47:13.311506', 'step': 21467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:13.391690', 'step': 21467, 'epoch': 3} {'type': 'loss', 'content': 0.0878695696592331, 'timestamp': '2025-09-30 22:47:13.399439', 'step': 21468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:13.465639', 'step': 21468, 'epoch': 3} {'type': 'loss', 'content': 0.11012037098407745, 'timestamp': '2025-09-30 22:47:13.469390', 'step': 21469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:13.528291', 'step': 21469, 'epoch': 3} {'type': 'loss', 'content': 0.0920858159661293, 'timestamp': '2025-09-30 22:47:13.533272', 'step': 21470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:13.594076', 'step': 21470, 'epoch': 3} {'type': 'loss', 'content': 0.12139803171157837, 'timestamp': '2025-09-30 22:47:13.599632', 'step': 21471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:13.658120', 'step': 21471, 'epoch': 3} {'type': 'loss', 'content': 0.09248286485671997, 'timestamp': '2025-09-30 22:47:13.666196', 'step': 21472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:13.743610', 'step': 21472, 'epoch': 3} {'type': 'loss', 'content': 0.10643140971660614, 'timestamp': '2025-09-30 22:47:13.748848', 'step': 21473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:13.815191', 'step': 21473, 'epoch': 3} {'type': 'loss', 'content': 0.01684282347559929, 'timestamp': '2025-09-30 22:47:13.820238', 'step': 21474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:13.883997', 'step': 21474, 'epoch': 3} {'type': 'loss', 'content': 0.0810881182551384, 'timestamp': '2025-09-30 22:47:13.890004', 'step': 21475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:13.950250', 'step': 21475, 'epoch': 3} {'type': 'loss', 'content': 0.1924554854631424, 'timestamp': '2025-09-30 22:47:13.958663', 'step': 21476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:14.018255', 'step': 21476, 'epoch': 3} {'type': 'loss', 'content': 0.10583487153053284, 'timestamp': '2025-09-30 22:47:14.032503', 'step': 21477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:47:14.108567', 'step': 21477, 'epoch': 3} {'type': 'loss', 'content': 0.07742371410131454, 'timestamp': '2025-09-30 22:47:14.114160', 'step': 21478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:14.181185', 'step': 21478, 'epoch': 3} {'type': 'loss', 'content': 0.09494040161371231, 'timestamp': '2025-09-30 22:47:14.185638', 'step': 21479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.266988', 'step': 21479, 'epoch': 3} {'type': 'loss', 'content': 0.0917520597577095, 'timestamp': '2025-09-30 22:47:14.277619', 'step': 21480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.337023', 'step': 21480, 'epoch': 3} {'type': 'loss', 'content': 0.09670910239219666, 'timestamp': '2025-09-30 22:47:14.348597', 'step': 21481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.415121', 'step': 21481, 'epoch': 3} {'type': 'loss', 'content': 0.09043288230895996, 'timestamp': '2025-09-30 22:47:14.422451', 'step': 21482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.485328', 'step': 21482, 'epoch': 3} {'type': 'loss', 'content': 0.11923801898956299, 'timestamp': '2025-09-30 22:47:14.489848', 'step': 21483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:14.548372', 'step': 21483, 'epoch': 3} {'type': 'loss', 'content': 0.11389359086751938, 'timestamp': '2025-09-30 22:47:14.556425', 'step': 21484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:14.620605', 'step': 21484, 'epoch': 3} {'type': 'loss', 'content': 0.07477669417858124, 'timestamp': '2025-09-30 22:47:14.625041', 'step': 21485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.685099', 'step': 21485, 'epoch': 3} {'type': 'loss', 'content': 0.0884595662355423, 'timestamp': '2025-09-30 22:47:14.700895', 'step': 21486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.762417', 'step': 21486, 'epoch': 3} {'type': 'loss', 'content': 0.07105221599340439, 'timestamp': '2025-09-30 22:47:14.767087', 'step': 21487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.838106', 'step': 21487, 'epoch': 3} {'type': 'loss', 'content': 0.07303681969642639, 'timestamp': '2025-09-30 22:47:14.856712', 'step': 21488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:14.915665', 'step': 21488, 'epoch': 3} {'type': 'loss', 'content': 0.12062123417854309, 'timestamp': '2025-09-30 22:47:14.919662', 'step': 21489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:14.979869', 'step': 21489, 'epoch': 3} {'type': 'loss', 'content': 0.12229540199041367, 'timestamp': '2025-09-30 22:47:14.996467', 'step': 21490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:15.060518', 'step': 21490, 'epoch': 3} {'type': 'loss', 'content': 0.08546171337366104, 'timestamp': '2025-09-30 22:47:15.064762', 'step': 21491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:15.127332', 'step': 21491, 'epoch': 3} {'type': 'loss', 'content': 0.08646797388792038, 'timestamp': '2025-09-30 22:47:15.136575', 'step': 21492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:15.209322', 'step': 21492, 'epoch': 3} {'type': 'loss', 'content': 0.044999849051237106, 'timestamp': '2025-09-30 22:47:15.214302', 'step': 21493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:15.286163', 'step': 21493, 'epoch': 3} {'type': 'loss', 'content': 0.07150448113679886, 'timestamp': '2025-09-30 22:47:15.290955', 'step': 21494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:15.351975', 'step': 21494, 'epoch': 3} {'type': 'loss', 'content': 0.12043905258178711, 'timestamp': '2025-09-30 22:47:15.356739', 'step': 21495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:15.418070', 'step': 21495, 'epoch': 3} {'type': 'loss', 'content': 0.06706587225198746, 'timestamp': '2025-09-30 22:47:15.430883', 'step': 21496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:15.493581', 'step': 21496, 'epoch': 3} {'type': 'loss', 'content': 0.1135602593421936, 'timestamp': '2025-09-30 22:47:15.501879', 'step': 21497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:15.572313', 'step': 21497, 'epoch': 3} {'type': 'loss', 'content': 0.10061665624380112, 'timestamp': '2025-09-30 22:47:15.577403', 'step': 21498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:15.642002', 'step': 21498, 'epoch': 3} {'type': 'loss', 'content': 0.06903646141290665, 'timestamp': '2025-09-30 22:47:15.647593', 'step': 21499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:15.709068', 'step': 21499, 'epoch': 3} {'type': 'loss', 'content': 0.11810542643070221, 'timestamp': '2025-09-30 22:47:15.718367', 'step': 21500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21500', 'timestamp': '2025-09-30 22:47:16.168834', 'step': 21500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:16.238581', 'step': 21500, 'epoch': 3} {'type': 'loss', 'content': 0.14093971252441406, 'timestamp': '2025-09-30 22:47:16.242653', 'step': 21501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:16.309541', 'step': 21501, 'epoch': 3} {'type': 'loss', 'content': 0.05348671227693558, 'timestamp': '2025-09-30 22:47:16.314745', 'step': 21502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:16.407223', 'step': 21502, 'epoch': 3} {'type': 'loss', 'content': 0.04067476466298103, 'timestamp': '2025-09-30 22:47:16.411988', 'step': 21503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:16.483164', 'step': 21503, 'epoch': 3} {'type': 'loss', 'content': 0.09505479037761688, 'timestamp': '2025-09-30 22:47:16.491862', 'step': 21504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:16.571192', 'step': 21504, 'epoch': 3} {'type': 'loss', 'content': 0.15879571437835693, 'timestamp': '2025-09-30 22:47:16.584779', 'step': 21505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:16.649106', 'step': 21505, 'epoch': 3} {'type': 'loss', 'content': 0.04028700292110443, 'timestamp': '2025-09-30 22:47:16.654161', 'step': 21506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:16.716122', 'step': 21506, 'epoch': 3} {'type': 'loss', 'content': 0.09408414363861084, 'timestamp': '2025-09-30 22:47:16.722865', 'step': 21507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:16.789903', 'step': 21507, 'epoch': 3} {'type': 'loss', 'content': 0.09727594256401062, 'timestamp': '2025-09-30 22:47:16.797654', 'step': 21508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:16.864733', 'step': 21508, 'epoch': 3} {'type': 'loss', 'content': 0.11179149895906448, 'timestamp': '2025-09-30 22:47:16.868818', 'step': 21509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:16.930478', 'step': 21509, 'epoch': 3} {'type': 'loss', 'content': 0.062443505972623825, 'timestamp': '2025-09-30 22:47:16.934418', 'step': 21510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:16.992150', 'step': 21510, 'epoch': 3} {'type': 'loss', 'content': 0.09007072448730469, 'timestamp': '2025-09-30 22:47:16.997112', 'step': 21511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:17.058755', 'step': 21511, 'epoch': 3} {'type': 'loss', 'content': 0.13451023399829865, 'timestamp': '2025-09-30 22:47:17.070235', 'step': 21512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:17.158117', 'step': 21512, 'epoch': 3} {'type': 'loss', 'content': 0.06006409972906113, 'timestamp': '2025-09-30 22:47:17.163169', 'step': 21513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:17.221828', 'step': 21513, 'epoch': 3} {'type': 'loss', 'content': 0.0949639081954956, 'timestamp': '2025-09-30 22:47:17.234967', 'step': 21514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:17.309242', 'step': 21514, 'epoch': 3} {'type': 'loss', 'content': 0.07519765943288803, 'timestamp': '2025-09-30 22:47:17.313655', 'step': 21515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:17.374095', 'step': 21515, 'epoch': 3} {'type': 'loss', 'content': 0.06719440966844559, 'timestamp': '2025-09-30 22:47:17.381953', 'step': 21516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:17.463597', 'step': 21516, 'epoch': 3} {'type': 'loss', 'content': 0.09098298847675323, 'timestamp': '2025-09-30 22:47:17.468667', 'step': 21517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:17.527937', 'step': 21517, 'epoch': 3} {'type': 'loss', 'content': 0.026143822818994522, 'timestamp': '2025-09-30 22:47:17.531450', 'step': 21518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:17.592705', 'step': 21518, 'epoch': 3} {'type': 'loss', 'content': 0.04171743616461754, 'timestamp': '2025-09-30 22:47:17.596113', 'step': 21519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:17.672203', 'step': 21519, 'epoch': 3} {'type': 'loss', 'content': 0.04941720888018608, 'timestamp': '2025-09-30 22:47:17.687761', 'step': 21520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:17.748249', 'step': 21520, 'epoch': 3} {'type': 'loss', 'content': 0.047324731945991516, 'timestamp': '2025-09-30 22:47:17.761324', 'step': 21521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:17.829967', 'step': 21521, 'epoch': 3} {'type': 'loss', 'content': 0.09508787095546722, 'timestamp': '2025-09-30 22:47:17.834776', 'step': 21522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:17.900232', 'step': 21522, 'epoch': 3} {'type': 'loss', 'content': 0.10098431259393692, 'timestamp': '2025-09-30 22:47:17.903460', 'step': 21523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:17.972141', 'step': 21523, 'epoch': 3} {'type': 'loss', 'content': 0.14787745475769043, 'timestamp': '2025-09-30 22:47:17.980098', 'step': 21524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:18.040775', 'step': 21524, 'epoch': 3} {'type': 'loss', 'content': 0.05350848287343979, 'timestamp': '2025-09-30 22:47:18.050741', 'step': 21525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:18.125975', 'step': 21525, 'epoch': 3} {'type': 'loss', 'content': 0.05167340859770775, 'timestamp': '2025-09-30 22:47:18.130881', 'step': 21526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:18.191546', 'step': 21526, 'epoch': 3} {'type': 'loss', 'content': 0.0805615782737732, 'timestamp': '2025-09-30 22:47:18.195253', 'step': 21527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:18.267983', 'step': 21527, 'epoch': 3} {'type': 'loss', 'content': 0.07547671347856522, 'timestamp': '2025-09-30 22:47:18.276499', 'step': 21528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:18.334810', 'step': 21528, 'epoch': 3} {'type': 'loss', 'content': 0.05004702880978584, 'timestamp': '2025-09-30 22:47:18.338326', 'step': 21529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:18.396981', 'step': 21529, 'epoch': 3} {'type': 'loss', 'content': 0.181735560297966, 'timestamp': '2025-09-30 22:47:18.402185', 'step': 21530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:18.464298', 'step': 21530, 'epoch': 3} {'type': 'loss', 'content': 0.07532420009374619, 'timestamp': '2025-09-30 22:47:18.469245', 'step': 21531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:18.529066', 'step': 21531, 'epoch': 3} {'type': 'loss', 'content': 0.061288878321647644, 'timestamp': '2025-09-30 22:47:18.543371', 'step': 21532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:18.604065', 'step': 21532, 'epoch': 3} {'type': 'loss', 'content': 0.06577564775943756, 'timestamp': '2025-09-30 22:47:18.608395', 'step': 21533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:18.702088', 'step': 21533, 'epoch': 3} {'type': 'loss', 'content': 0.07150238007307053, 'timestamp': '2025-09-30 22:47:18.707131', 'step': 21534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:18.767613', 'step': 21534, 'epoch': 3} {'type': 'loss', 'content': 0.03978580981492996, 'timestamp': '2025-09-30 22:47:18.783394', 'step': 21535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:18.844627', 'step': 21535, 'epoch': 3} {'type': 'loss', 'content': 0.10690921545028687, 'timestamp': '2025-09-30 22:47:18.852959', 'step': 21536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:18.912266', 'step': 21536, 'epoch': 3} {'type': 'loss', 'content': 0.057113442569971085, 'timestamp': '2025-09-30 22:47:18.928878', 'step': 21537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:18.993248', 'step': 21537, 'epoch': 3} {'type': 'loss', 'content': 0.012980575673282146, 'timestamp': '2025-09-30 22:47:19.003127', 'step': 21538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:19.074469', 'step': 21538, 'epoch': 3} {'type': 'loss', 'content': 0.05989610031247139, 'timestamp': '2025-09-30 22:47:19.083772', 'step': 21539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:19.162135', 'step': 21539, 'epoch': 3} {'type': 'loss', 'content': 0.04278760030865669, 'timestamp': '2025-09-30 22:47:19.174013', 'step': 21540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:19.242408', 'step': 21540, 'epoch': 3} {'type': 'loss', 'content': 0.07285553216934204, 'timestamp': '2025-09-30 22:47:19.246459', 'step': 21541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:19.316687', 'step': 21541, 'epoch': 3} {'type': 'loss', 'content': 0.1350342482328415, 'timestamp': '2025-09-30 22:47:19.320502', 'step': 21542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:19.386498', 'step': 21542, 'epoch': 3} {'type': 'loss', 'content': 0.135152667760849, 'timestamp': '2025-09-30 22:47:19.390368', 'step': 21543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:19.450192', 'step': 21543, 'epoch': 3} {'type': 'loss', 'content': 0.04028685390949249, 'timestamp': '2025-09-30 22:47:19.469438', 'step': 21544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:19.531766', 'step': 21544, 'epoch': 3} {'type': 'loss', 'content': 0.12374041229486465, 'timestamp': '2025-09-30 22:47:19.542476', 'step': 21545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:19.603158', 'step': 21545, 'epoch': 3} {'type': 'loss', 'content': 0.07698850333690643, 'timestamp': '2025-09-30 22:47:19.607012', 'step': 21546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:19.667951', 'step': 21546, 'epoch': 3} {'type': 'loss', 'content': 0.04972871392965317, 'timestamp': '2025-09-30 22:47:19.673749', 'step': 21547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:19.733723', 'step': 21547, 'epoch': 3} {'type': 'loss', 'content': 0.064658023416996, 'timestamp': '2025-09-30 22:47:19.744903', 'step': 21548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:19.803957', 'step': 21548, 'epoch': 3} {'type': 'loss', 'content': 0.17199349403381348, 'timestamp': '2025-09-30 22:47:19.816443', 'step': 21549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:19.877579', 'step': 21549, 'epoch': 3} {'type': 'loss', 'content': 0.07628665864467621, 'timestamp': '2025-09-30 22:47:19.882185', 'step': 21550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:19.942112', 'step': 21550, 'epoch': 3} {'type': 'loss', 'content': 0.08736875653266907, 'timestamp': '2025-09-30 22:47:19.953498', 'step': 21551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:20.011810', 'step': 21551, 'epoch': 3} {'type': 'loss', 'content': 0.0525604747235775, 'timestamp': '2025-09-30 22:47:20.027902', 'step': 21552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:20.093247', 'step': 21552, 'epoch': 3} {'type': 'loss', 'content': 0.05481771007180214, 'timestamp': '2025-09-30 22:47:20.096534', 'step': 21553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:20.181507', 'step': 21553, 'epoch': 3} {'type': 'loss', 'content': 0.06571417301893234, 'timestamp': '2025-09-30 22:47:20.186014', 'step': 21554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:20.252133', 'step': 21554, 'epoch': 3} {'type': 'loss', 'content': 0.10685493797063828, 'timestamp': '2025-09-30 22:47:20.256315', 'step': 21555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:20.314967', 'step': 21555, 'epoch': 3} {'type': 'loss', 'content': 0.05852297320961952, 'timestamp': '2025-09-30 22:47:20.326540', 'step': 21556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:20.391127', 'step': 21556, 'epoch': 3} {'type': 'loss', 'content': 0.014210567809641361, 'timestamp': '2025-09-30 22:47:20.395066', 'step': 21557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:20.454847', 'step': 21557, 'epoch': 3} {'type': 'loss', 'content': 0.07248901575803757, 'timestamp': '2025-09-30 22:47:20.461879', 'step': 21558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:20.520807', 'step': 21558, 'epoch': 3} {'type': 'loss', 'content': 0.09884025901556015, 'timestamp': '2025-09-30 22:47:20.523919', 'step': 21559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:20.587770', 'step': 21559, 'epoch': 3} {'type': 'loss', 'content': 0.13525548577308655, 'timestamp': '2025-09-30 22:47:20.595546', 'step': 21560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:20.659358', 'step': 21560, 'epoch': 3} {'type': 'loss', 'content': 0.10545604676008224, 'timestamp': '2025-09-30 22:47:20.663477', 'step': 21561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:20.724744', 'step': 21561, 'epoch': 3} {'type': 'loss', 'content': 0.04250357300043106, 'timestamp': '2025-09-30 22:47:20.729886', 'step': 21562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:47:20.804374', 'step': 21562, 'epoch': 3} {'type': 'loss', 'content': 0.14071279764175415, 'timestamp': '2025-09-30 22:47:20.808243', 'step': 21563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:20.868975', 'step': 21563, 'epoch': 3} {'type': 'loss', 'content': 0.13154463469982147, 'timestamp': '2025-09-30 22:47:20.878646', 'step': 21564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:20.939974', 'step': 21564, 'epoch': 3} {'type': 'loss', 'content': 0.047849856317043304, 'timestamp': '2025-09-30 22:47:20.944332', 'step': 21565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:21.030581', 'step': 21565, 'epoch': 3} {'type': 'loss', 'content': 0.029753562062978745, 'timestamp': '2025-09-30 22:47:21.033889', 'step': 21566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:21.094886', 'step': 21566, 'epoch': 3} {'type': 'loss', 'content': 0.037622496485710144, 'timestamp': '2025-09-30 22:47:21.099858', 'step': 21567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:21.163644', 'step': 21567, 'epoch': 3} {'type': 'loss', 'content': 0.06430954486131668, 'timestamp': '2025-09-30 22:47:21.171713', 'step': 21568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:21.255176', 'step': 21568, 'epoch': 3} {'type': 'loss', 'content': 0.12340464442968369, 'timestamp': '2025-09-30 22:47:21.269896', 'step': 21569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:21.332794', 'step': 21569, 'epoch': 3} {'type': 'loss', 'content': 0.0716543048620224, 'timestamp': '2025-09-30 22:47:21.337531', 'step': 21570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:21.396985', 'step': 21570, 'epoch': 3} {'type': 'loss', 'content': 0.029396452009677887, 'timestamp': '2025-09-30 22:47:21.407030', 'step': 21571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:21.466092', 'step': 21571, 'epoch': 3} {'type': 'loss', 'content': 0.103460393846035, 'timestamp': '2025-09-30 22:47:21.474499', 'step': 21572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:21.539902', 'step': 21572, 'epoch': 3} {'type': 'loss', 'content': 0.0785389319062233, 'timestamp': '2025-09-30 22:47:21.545524', 'step': 21573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:21.617879', 'step': 21573, 'epoch': 3} {'type': 'loss', 'content': 0.062557153403759, 'timestamp': '2025-09-30 22:47:21.622078', 'step': 21574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:21.692424', 'step': 21574, 'epoch': 3} {'type': 'loss', 'content': 0.13554711639881134, 'timestamp': '2025-09-30 22:47:21.697510', 'step': 21575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:21.758692', 'step': 21575, 'epoch': 3} {'type': 'loss', 'content': 0.03950805589556694, 'timestamp': '2025-09-30 22:47:21.766874', 'step': 21576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:21.841481', 'step': 21576, 'epoch': 3} {'type': 'loss', 'content': 0.06071242690086365, 'timestamp': '2025-09-30 22:47:21.847041', 'step': 21577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:21.916219', 'step': 21577, 'epoch': 3} {'type': 'loss', 'content': 0.14250393211841583, 'timestamp': '2025-09-30 22:47:21.920163', 'step': 21578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:21.990909', 'step': 21578, 'epoch': 3} {'type': 'loss', 'content': 0.04987604171037674, 'timestamp': '2025-09-30 22:47:21.996383', 'step': 21579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:22.056422', 'step': 21579, 'epoch': 3} {'type': 'loss', 'content': 0.08565039187669754, 'timestamp': '2025-09-30 22:47:22.065128', 'step': 21580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:22.124546', 'step': 21580, 'epoch': 3} {'type': 'loss', 'content': 0.02975388616323471, 'timestamp': '2025-09-30 22:47:22.130880', 'step': 21581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:22.197071', 'step': 21581, 'epoch': 3} {'type': 'loss', 'content': 0.03369096666574478, 'timestamp': '2025-09-30 22:47:22.204808', 'step': 21582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:22.266726', 'step': 21582, 'epoch': 3} {'type': 'loss', 'content': 0.05073116347193718, 'timestamp': '2025-09-30 22:47:22.272410', 'step': 21583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:22.334028', 'step': 21583, 'epoch': 3} {'type': 'loss', 'content': 0.07196389883756638, 'timestamp': '2025-09-30 22:47:22.343208', 'step': 21584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:22.404200', 'step': 21584, 'epoch': 3} {'type': 'loss', 'content': 0.043336573988199234, 'timestamp': '2025-09-30 22:47:22.414653', 'step': 21585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:22.484619', 'step': 21585, 'epoch': 3} {'type': 'loss', 'content': 0.04190748929977417, 'timestamp': '2025-09-30 22:47:22.489263', 'step': 21586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:22.578906', 'step': 21586, 'epoch': 3} {'type': 'loss', 'content': 0.22607624530792236, 'timestamp': '2025-09-30 22:47:22.582738', 'step': 21587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:22.643301', 'step': 21587, 'epoch': 3} {'type': 'loss', 'content': 0.0895984098315239, 'timestamp': '2025-09-30 22:47:22.658005', 'step': 21588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:22.720735', 'step': 21588, 'epoch': 3} {'type': 'loss', 'content': 0.05599750950932503, 'timestamp': '2025-09-30 22:47:22.725012', 'step': 21589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:22.792585', 'step': 21589, 'epoch': 3} {'type': 'loss', 'content': 0.06261524558067322, 'timestamp': '2025-09-30 22:47:22.798178', 'step': 21590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:22.856823', 'step': 21590, 'epoch': 3} {'type': 'loss', 'content': 0.00798454787582159, 'timestamp': '2025-09-30 22:47:22.862440', 'step': 21591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:22.937608', 'step': 21591, 'epoch': 3} {'type': 'loss', 'content': 0.028707582503557205, 'timestamp': '2025-09-30 22:47:22.947376', 'step': 21592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:23.006677', 'step': 21592, 'epoch': 3} {'type': 'loss', 'content': 0.06636229902505875, 'timestamp': '2025-09-30 22:47:23.011944', 'step': 21593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:23.083962', 'step': 21593, 'epoch': 3} {'type': 'loss', 'content': 0.1286882758140564, 'timestamp': '2025-09-30 22:47:23.088091', 'step': 21594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:23.223340', 'step': 21594, 'epoch': 3} {'type': 'loss', 'content': 0.1186324805021286, 'timestamp': '2025-09-30 22:47:23.227468', 'step': 21595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:23.328261', 'step': 21595, 'epoch': 3} {'type': 'loss', 'content': 0.03908449411392212, 'timestamp': '2025-09-30 22:47:23.337406', 'step': 21596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:23.497338', 'step': 21596, 'epoch': 3} {'type': 'loss', 'content': 0.056796398013830185, 'timestamp': '2025-09-30 22:47:23.502050', 'step': 21597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:23.603592', 'step': 21597, 'epoch': 3} {'type': 'loss', 'content': 0.06564002484083176, 'timestamp': '2025-09-30 22:47:23.607606', 'step': 21598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:23.701100', 'step': 21598, 'epoch': 3} {'type': 'loss', 'content': 0.16393719613552094, 'timestamp': '2025-09-30 22:47:23.705015', 'step': 21599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:23.810648', 'step': 21599, 'epoch': 3} {'type': 'loss', 'content': 0.05423717200756073, 'timestamp': '2025-09-30 22:47:23.817695', 'step': 21600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:23.902576', 'step': 21600, 'epoch': 3} {'type': 'loss', 'content': 0.03500925004482269, 'timestamp': '2025-09-30 22:47:23.907078', 'step': 21601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:23.992203', 'step': 21601, 'epoch': 3} {'type': 'loss', 'content': 0.05704863741993904, 'timestamp': '2025-09-30 22:47:23.997142', 'step': 21602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:24.087424', 'step': 21602, 'epoch': 3} {'type': 'loss', 'content': 0.1096576452255249, 'timestamp': '2025-09-30 22:47:24.092658', 'step': 21603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:24.170281', 'step': 21603, 'epoch': 3} {'type': 'loss', 'content': 0.1116526797413826, 'timestamp': '2025-09-30 22:47:24.179015', 'step': 21604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:24.262227', 'step': 21604, 'epoch': 3} {'type': 'loss', 'content': 0.0906779021024704, 'timestamp': '2025-09-30 22:47:24.267470', 'step': 21605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:24.370644', 'step': 21605, 'epoch': 3} {'type': 'loss', 'content': 0.033092912286520004, 'timestamp': '2025-09-30 22:47:24.379073', 'step': 21606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:24.459974', 'step': 21606, 'epoch': 3} {'type': 'loss', 'content': 0.031929317861795425, 'timestamp': '2025-09-30 22:47:24.466101', 'step': 21607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:24.542320', 'step': 21607, 'epoch': 3} {'type': 'loss', 'content': 0.06333224475383759, 'timestamp': '2025-09-30 22:47:24.550147', 'step': 21608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:24.611397', 'step': 21608, 'epoch': 3} {'type': 'loss', 'content': 0.18424740433692932, 'timestamp': '2025-09-30 22:47:24.615679', 'step': 21609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:24.674619', 'step': 21609, 'epoch': 3} {'type': 'loss', 'content': 0.13933783769607544, 'timestamp': '2025-09-30 22:47:24.679203', 'step': 21610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:24.741136', 'step': 21610, 'epoch': 3} {'type': 'loss', 'content': 0.09472498297691345, 'timestamp': '2025-09-30 22:47:24.753541', 'step': 21611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:24.815327', 'step': 21611, 'epoch': 3} {'type': 'loss', 'content': 0.04713461920619011, 'timestamp': '2025-09-30 22:47:24.824267', 'step': 21612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:24.896653', 'step': 21612, 'epoch': 3} {'type': 'loss', 'content': 0.1002124696969986, 'timestamp': '2025-09-30 22:47:24.900570', 'step': 21613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:24.959798', 'step': 21613, 'epoch': 3} {'type': 'loss', 'content': 0.08541702479124069, 'timestamp': '2025-09-30 22:47:24.964731', 'step': 21614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:25.028063', 'step': 21614, 'epoch': 3} {'type': 'loss', 'content': 0.08570270240306854, 'timestamp': '2025-09-30 22:47:25.032586', 'step': 21615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:25.090790', 'step': 21615, 'epoch': 3} {'type': 'loss', 'content': 0.08275310695171356, 'timestamp': '2025-09-30 22:47:25.099096', 'step': 21616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:25.168186', 'step': 21616, 'epoch': 3} {'type': 'loss', 'content': 0.09566270560026169, 'timestamp': '2025-09-30 22:47:25.175237', 'step': 21617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:25.236096', 'step': 21617, 'epoch': 3} {'type': 'loss', 'content': 0.09186413139104843, 'timestamp': '2025-09-30 22:47:25.239743', 'step': 21618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:25.297802', 'step': 21618, 'epoch': 3} {'type': 'loss', 'content': 0.07657519727945328, 'timestamp': '2025-09-30 22:47:25.300738', 'step': 21619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:25.357301', 'step': 21619, 'epoch': 3} {'type': 'loss', 'content': 0.05573433265089989, 'timestamp': '2025-09-30 22:47:25.371777', 'step': 21620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:25.430241', 'step': 21620, 'epoch': 3} {'type': 'loss', 'content': 0.09714575856924057, 'timestamp': '2025-09-30 22:47:25.434546', 'step': 21621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:25.495131', 'step': 21621, 'epoch': 3} {'type': 'loss', 'content': 0.10891251266002655, 'timestamp': '2025-09-30 22:47:25.498878', 'step': 21622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:25.565362', 'step': 21622, 'epoch': 3} {'type': 'loss', 'content': 0.04400751367211342, 'timestamp': '2025-09-30 22:47:25.576717', 'step': 21623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:25.637692', 'step': 21623, 'epoch': 3} {'type': 'loss', 'content': 0.07026698440313339, 'timestamp': '2025-09-30 22:47:25.644593', 'step': 21624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:25.704055', 'step': 21624, 'epoch': 3} {'type': 'loss', 'content': 0.12301875650882721, 'timestamp': '2025-09-30 22:47:25.714199', 'step': 21625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:47:25.782075', 'step': 21625, 'epoch': 3} {'type': 'loss', 'content': 0.05981777608394623, 'timestamp': '2025-09-30 22:47:25.787200', 'step': 21626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:25.856830', 'step': 21626, 'epoch': 3} {'type': 'loss', 'content': 0.06762979179620743, 'timestamp': '2025-09-30 22:47:25.864959', 'step': 21627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:25.923637', 'step': 21627, 'epoch': 3} {'type': 'loss', 'content': 0.09097583591938019, 'timestamp': '2025-09-30 22:47:25.940987', 'step': 21628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:26.006827', 'step': 21628, 'epoch': 3} {'type': 'loss', 'content': 0.017532356083393097, 'timestamp': '2025-09-30 22:47:26.011146', 'step': 21629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:26.072921', 'step': 21629, 'epoch': 3} {'type': 'loss', 'content': 0.12188000977039337, 'timestamp': '2025-09-30 22:47:26.076958', 'step': 21630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:26.141104', 'step': 21630, 'epoch': 3} {'type': 'loss', 'content': 0.20024020969867706, 'timestamp': '2025-09-30 22:47:26.144863', 'step': 21631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:26.222741', 'step': 21631, 'epoch': 3} {'type': 'loss', 'content': 0.12355953454971313, 'timestamp': '2025-09-30 22:47:26.231094', 'step': 21632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:26.294341', 'step': 21632, 'epoch': 3} {'type': 'loss', 'content': 0.058315251022577286, 'timestamp': '2025-09-30 22:47:26.303558', 'step': 21633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:26.360542', 'step': 21633, 'epoch': 3} {'type': 'loss', 'content': 0.08774030953645706, 'timestamp': '2025-09-30 22:47:26.366256', 'step': 21634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:26.435186', 'step': 21634, 'epoch': 3} {'type': 'loss', 'content': 0.06628359854221344, 'timestamp': '2025-09-30 22:47:26.447289', 'step': 21635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:26.513517', 'step': 21635, 'epoch': 3} {'type': 'loss', 'content': 0.10947248339653015, 'timestamp': '2025-09-30 22:47:26.522342', 'step': 21636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 4480027263872.0}, 'timestamp': '2025-09-30 22:47:26.580164', 'step': 21636, 'epoch': 3} {'type': 'loss', 'content': 0.10516464710235596, 'timestamp': '2025-09-30 22:47:26.585005', 'step': 21637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:26.655716', 'step': 21637, 'epoch': 3} {'type': 'loss', 'content': 0.0970761701464653, 'timestamp': '2025-09-30 22:47:26.660048', 'step': 21638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:26.730208', 'step': 21638, 'epoch': 3} {'type': 'loss', 'content': 0.10367994010448456, 'timestamp': '2025-09-30 22:47:26.745874', 'step': 21639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:26.815845', 'step': 21639, 'epoch': 3} {'type': 'loss', 'content': 0.06364801526069641, 'timestamp': '2025-09-30 22:47:26.825078', 'step': 21640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:26.885547', 'step': 21640, 'epoch': 3} {'type': 'loss', 'content': 0.0943518802523613, 'timestamp': '2025-09-30 22:47:26.898527', 'step': 21641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:26.967043', 'step': 21641, 'epoch': 3} {'type': 'loss', 'content': 0.10502777993679047, 'timestamp': '2025-09-30 22:47:26.970444', 'step': 21642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:27.038313', 'step': 21642, 'epoch': 3} {'type': 'loss', 'content': 0.10676219314336777, 'timestamp': '2025-09-30 22:47:27.042775', 'step': 21643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:27.112837', 'step': 21643, 'epoch': 3} {'type': 'loss', 'content': 0.07374270260334015, 'timestamp': '2025-09-30 22:47:27.120186', 'step': 21644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:27.198508', 'step': 21644, 'epoch': 3} {'type': 'loss', 'content': 0.03892382234334946, 'timestamp': '2025-09-30 22:47:27.206416', 'step': 21645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:27.265507', 'step': 21645, 'epoch': 3} {'type': 'loss', 'content': 0.04612914100289345, 'timestamp': '2025-09-30 22:47:27.269534', 'step': 21646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:27.334889', 'step': 21646, 'epoch': 3} {'type': 'loss', 'content': 0.06987596303224564, 'timestamp': '2025-09-30 22:47:27.341874', 'step': 21647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:27.414671', 'step': 21647, 'epoch': 3} {'type': 'loss', 'content': 0.0365200974047184, 'timestamp': '2025-09-30 22:47:27.425442', 'step': 21648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:27.485203', 'step': 21648, 'epoch': 3} {'type': 'loss', 'content': 0.12249890714883804, 'timestamp': '2025-09-30 22:47:27.489215', 'step': 21649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:27.557751', 'step': 21649, 'epoch': 3} {'type': 'loss', 'content': 0.14132608473300934, 'timestamp': '2025-09-30 22:47:27.567412', 'step': 21650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:27.634810', 'step': 21650, 'epoch': 3} {'type': 'loss', 'content': 0.11717738211154938, 'timestamp': '2025-09-30 22:47:27.639342', 'step': 21651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:27.709913', 'step': 21651, 'epoch': 3} {'type': 'loss', 'content': 0.08064141869544983, 'timestamp': '2025-09-30 22:47:27.727897', 'step': 21652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:27.785200', 'step': 21652, 'epoch': 3} {'type': 'loss', 'content': 0.06017077714204788, 'timestamp': '2025-09-30 22:47:27.790133', 'step': 21653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:27.849430', 'step': 21653, 'epoch': 3} {'type': 'loss', 'content': 0.08828193694353104, 'timestamp': '2025-09-30 22:47:27.852996', 'step': 21654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:27.920510', 'step': 21654, 'epoch': 3} {'type': 'loss', 'content': 0.026772338896989822, 'timestamp': '2025-09-30 22:47:27.924000', 'step': 21655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:27.982764', 'step': 21655, 'epoch': 3} {'type': 'loss', 'content': 0.10591389238834381, 'timestamp': '2025-09-30 22:47:27.990216', 'step': 21656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:28.053921', 'step': 21656, 'epoch': 3} {'type': 'loss', 'content': 0.09347141534090042, 'timestamp': '2025-09-30 22:47:28.057710', 'step': 21657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:28.119524', 'step': 21657, 'epoch': 3} {'type': 'loss', 'content': 0.08971798419952393, 'timestamp': '2025-09-30 22:47:28.130278', 'step': 21658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:28.196106', 'step': 21658, 'epoch': 3} {'type': 'loss', 'content': 0.08100976794958115, 'timestamp': '2025-09-30 22:47:28.201367', 'step': 21659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:28.268214', 'step': 21659, 'epoch': 3} {'type': 'loss', 'content': 0.06874555349349976, 'timestamp': '2025-09-30 22:47:28.274949', 'step': 21660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:28.334178', 'step': 21660, 'epoch': 3} {'type': 'loss', 'content': 0.1308513581752777, 'timestamp': '2025-09-30 22:47:28.337252', 'step': 21661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:28.406520', 'step': 21661, 'epoch': 3} {'type': 'loss', 'content': 0.051532164216041565, 'timestamp': '2025-09-30 22:47:28.419931', 'step': 21662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:28.485090', 'step': 21662, 'epoch': 3} {'type': 'loss', 'content': 0.09497006237506866, 'timestamp': '2025-09-30 22:47:28.491862', 'step': 21663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:28.564545', 'step': 21663, 'epoch': 3} {'type': 'loss', 'content': 0.07756569981575012, 'timestamp': '2025-09-30 22:47:28.582699', 'step': 21664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:28.644096', 'step': 21664, 'epoch': 3} {'type': 'loss', 'content': 0.05620535835623741, 'timestamp': '2025-09-30 22:47:28.649352', 'step': 21665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:28.708112', 'step': 21665, 'epoch': 3} {'type': 'loss', 'content': 0.07373616844415665, 'timestamp': '2025-09-30 22:47:28.715820', 'step': 21666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:28.774474', 'step': 21666, 'epoch': 3} {'type': 'loss', 'content': 0.13720187544822693, 'timestamp': '2025-09-30 22:47:28.778231', 'step': 21667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:28.844310', 'step': 21667, 'epoch': 3} {'type': 'loss', 'content': 0.10650744289159775, 'timestamp': '2025-09-30 22:47:28.852298', 'step': 21668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:28.916341', 'step': 21668, 'epoch': 3} {'type': 'loss', 'content': 0.023717932403087616, 'timestamp': '2025-09-30 22:47:28.919398', 'step': 21669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:28.980430', 'step': 21669, 'epoch': 3} {'type': 'loss', 'content': 0.14435775578022003, 'timestamp': '2025-09-30 22:47:28.985121', 'step': 21670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:29.044724', 'step': 21670, 'epoch': 3} {'type': 'loss', 'content': 0.057175375521183014, 'timestamp': '2025-09-30 22:47:29.047778', 'step': 21671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:29.121506', 'step': 21671, 'epoch': 3} {'type': 'loss', 'content': 0.12411735951900482, 'timestamp': '2025-09-30 22:47:29.160168', 'step': 21672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:29.234789', 'step': 21672, 'epoch': 3} {'type': 'loss', 'content': 0.1272408664226532, 'timestamp': '2025-09-30 22:47:29.239536', 'step': 21673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:29.307776', 'step': 21673, 'epoch': 3} {'type': 'loss', 'content': 0.05644267797470093, 'timestamp': '2025-09-30 22:47:29.313819', 'step': 21674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:29.386333', 'step': 21674, 'epoch': 3} {'type': 'loss', 'content': 0.09369482100009918, 'timestamp': '2025-09-30 22:47:29.401609', 'step': 21675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:29.472250', 'step': 21675, 'epoch': 3} {'type': 'loss', 'content': 0.12411196529865265, 'timestamp': '2025-09-30 22:47:29.480561', 'step': 21676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:29.548791', 'step': 21676, 'epoch': 3} {'type': 'loss', 'content': 0.07262663543224335, 'timestamp': '2025-09-30 22:47:29.554918', 'step': 21677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:29.626344', 'step': 21677, 'epoch': 3} {'type': 'loss', 'content': 0.02066478691995144, 'timestamp': '2025-09-30 22:47:29.632439', 'step': 21678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:29.695470', 'step': 21678, 'epoch': 3} {'type': 'loss', 'content': 0.0721062645316124, 'timestamp': '2025-09-30 22:47:29.699628', 'step': 21679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:29.787601', 'step': 21679, 'epoch': 3} {'type': 'loss', 'content': 0.06776247173547745, 'timestamp': '2025-09-30 22:47:29.796145', 'step': 21680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:29.854110', 'step': 21680, 'epoch': 3} {'type': 'loss', 'content': 0.13884149491786957, 'timestamp': '2025-09-30 22:47:29.857639', 'step': 21681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:29.923461', 'step': 21681, 'epoch': 3} {'type': 'loss', 'content': 0.11733831465244293, 'timestamp': '2025-09-30 22:47:29.928087', 'step': 21682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:29.990617', 'step': 21682, 'epoch': 3} {'type': 'loss', 'content': 0.04746289923787117, 'timestamp': '2025-09-30 22:47:30.004145', 'step': 21683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:30.064553', 'step': 21683, 'epoch': 3} {'type': 'loss', 'content': 0.07498498260974884, 'timestamp': '2025-09-30 22:47:30.073887', 'step': 21684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:30.135290', 'step': 21684, 'epoch': 3} {'type': 'loss', 'content': 0.12082839012145996, 'timestamp': '2025-09-30 22:47:30.147764', 'step': 21685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:30.208103', 'step': 21685, 'epoch': 3} {'type': 'loss', 'content': 0.10816925019025803, 'timestamp': '2025-09-30 22:47:30.212779', 'step': 21686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:30.273408', 'step': 21686, 'epoch': 3} {'type': 'loss', 'content': 0.07107976824045181, 'timestamp': '2025-09-30 22:47:30.280680', 'step': 21687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:30.342155', 'step': 21687, 'epoch': 3} {'type': 'loss', 'content': 0.13026435673236847, 'timestamp': '2025-09-30 22:47:30.350500', 'step': 21688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:30.422684', 'step': 21688, 'epoch': 3} {'type': 'loss', 'content': 0.03643914312124252, 'timestamp': '2025-09-30 22:47:30.427532', 'step': 21689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:30.488587', 'step': 21689, 'epoch': 3} {'type': 'loss', 'content': 0.15614864230155945, 'timestamp': '2025-09-30 22:47:30.504692', 'step': 21690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:30.570581', 'step': 21690, 'epoch': 3} {'type': 'loss', 'content': 0.10306907445192337, 'timestamp': '2025-09-30 22:47:30.576798', 'step': 21691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:30.637679', 'step': 21691, 'epoch': 3} {'type': 'loss', 'content': 0.13459689915180206, 'timestamp': '2025-09-30 22:47:30.647955', 'step': 21692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:30.732304', 'step': 21692, 'epoch': 3} {'type': 'loss', 'content': 0.03528745099902153, 'timestamp': '2025-09-30 22:47:30.738295', 'step': 21693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:30.799338', 'step': 21693, 'epoch': 3} {'type': 'loss', 'content': 0.10266727209091187, 'timestamp': '2025-09-30 22:47:30.810321', 'step': 21694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:30.873797', 'step': 21694, 'epoch': 3} {'type': 'loss', 'content': 0.03999579697847366, 'timestamp': '2025-09-30 22:47:30.879187', 'step': 21695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:30.938719', 'step': 21695, 'epoch': 3} {'type': 'loss', 'content': 0.05419539660215378, 'timestamp': '2025-09-30 22:47:30.947422', 'step': 21696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:31.006804', 'step': 21696, 'epoch': 3} {'type': 'loss', 'content': 0.04004605859518051, 'timestamp': '2025-09-30 22:47:31.011548', 'step': 21697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:31.069638', 'step': 21697, 'epoch': 3} {'type': 'loss', 'content': 0.07975929975509644, 'timestamp': '2025-09-30 22:47:31.073630', 'step': 21698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:31.135062', 'step': 21698, 'epoch': 3} {'type': 'loss', 'content': 0.08871181309223175, 'timestamp': '2025-09-30 22:47:31.140962', 'step': 21699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:31.199397', 'step': 21699, 'epoch': 3} {'type': 'loss', 'content': 0.10005330294370651, 'timestamp': '2025-09-30 22:47:31.211865', 'step': 21700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:31.285799', 'step': 21700, 'epoch': 3} {'type': 'loss', 'content': 0.02236928418278694, 'timestamp': '2025-09-30 22:47:31.290115', 'step': 21701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:31.351577', 'step': 21701, 'epoch': 3} {'type': 'loss', 'content': 0.04528385400772095, 'timestamp': '2025-09-30 22:47:31.355044', 'step': 21702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:31.427978', 'step': 21702, 'epoch': 3} {'type': 'loss', 'content': 0.14808158576488495, 'timestamp': '2025-09-30 22:47:31.432255', 'step': 21703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:31.492154', 'step': 21703, 'epoch': 3} {'type': 'loss', 'content': 0.08494158089160919, 'timestamp': '2025-09-30 22:47:31.500237', 'step': 21704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:31.563721', 'step': 21704, 'epoch': 3} {'type': 'loss', 'content': 0.08120641112327576, 'timestamp': '2025-09-30 22:47:31.569111', 'step': 21705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:31.639257', 'step': 21705, 'epoch': 3} {'type': 'loss', 'content': 0.07726454734802246, 'timestamp': '2025-09-30 22:47:31.644921', 'step': 21706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:31.721998', 'step': 21706, 'epoch': 3} {'type': 'loss', 'content': 0.11700518429279327, 'timestamp': '2025-09-30 22:47:31.726403', 'step': 21707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:31.798230', 'step': 21707, 'epoch': 3} {'type': 'loss', 'content': 0.05930013582110405, 'timestamp': '2025-09-30 22:47:31.806199', 'step': 21708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:31.865618', 'step': 21708, 'epoch': 3} {'type': 'loss', 'content': 0.07590333372354507, 'timestamp': '2025-09-30 22:47:31.869477', 'step': 21709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:31.930305', 'step': 21709, 'epoch': 3} {'type': 'loss', 'content': 0.0741553008556366, 'timestamp': '2025-09-30 22:47:31.933655', 'step': 21710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:31.995377', 'step': 21710, 'epoch': 3} {'type': 'loss', 'content': 0.1080462709069252, 'timestamp': '2025-09-30 22:47:32.002329', 'step': 21711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:32.062756', 'step': 21711, 'epoch': 3} {'type': 'loss', 'content': 0.048125870525836945, 'timestamp': '2025-09-30 22:47:32.071622', 'step': 21712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:32.129993', 'step': 21712, 'epoch': 3} {'type': 'loss', 'content': 0.07123810797929764, 'timestamp': '2025-09-30 22:47:32.150228', 'step': 21713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:32.220090', 'step': 21713, 'epoch': 3} {'type': 'loss', 'content': 0.09409615397453308, 'timestamp': '2025-09-30 22:47:32.224438', 'step': 21714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:32.295364', 'step': 21714, 'epoch': 3} {'type': 'loss', 'content': 0.04883173853158951, 'timestamp': '2025-09-30 22:47:32.298661', 'step': 21715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:32.359974', 'step': 21715, 'epoch': 3} {'type': 'loss', 'content': 0.06865983456373215, 'timestamp': '2025-09-30 22:47:32.374253', 'step': 21716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:32.431634', 'step': 21716, 'epoch': 3} {'type': 'loss', 'content': 0.10723058879375458, 'timestamp': '2025-09-30 22:47:32.436112', 'step': 21717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:32.501578', 'step': 21717, 'epoch': 3} {'type': 'loss', 'content': 0.13109935820102692, 'timestamp': '2025-09-30 22:47:32.506740', 'step': 21718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:32.565002', 'step': 21718, 'epoch': 3} {'type': 'loss', 'content': 0.09737933427095413, 'timestamp': '2025-09-30 22:47:32.568718', 'step': 21719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:32.627239', 'step': 21719, 'epoch': 3} {'type': 'loss', 'content': 0.10578132420778275, 'timestamp': '2025-09-30 22:47:32.635555', 'step': 21720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:32.695813', 'step': 21720, 'epoch': 3} {'type': 'loss', 'content': 0.11649707704782486, 'timestamp': '2025-09-30 22:47:32.706994', 'step': 21721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:32.766109', 'step': 21721, 'epoch': 3} {'type': 'loss', 'content': 0.1623925119638443, 'timestamp': '2025-09-30 22:47:32.771985', 'step': 21722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:32.830154', 'step': 21722, 'epoch': 3} {'type': 'loss', 'content': 0.1408056914806366, 'timestamp': '2025-09-30 22:47:32.834659', 'step': 21723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:32.893996', 'step': 21723, 'epoch': 3} {'type': 'loss', 'content': 0.06625601649284363, 'timestamp': '2025-09-30 22:47:32.901170', 'step': 21724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:32.960262', 'step': 21724, 'epoch': 3} {'type': 'loss', 'content': 0.13879428803920746, 'timestamp': '2025-09-30 22:47:32.964277', 'step': 21725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:33.023848', 'step': 21725, 'epoch': 3} {'type': 'loss', 'content': 0.0939812883734703, 'timestamp': '2025-09-30 22:47:33.027728', 'step': 21726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.093739', 'step': 21726, 'epoch': 3} {'type': 'loss', 'content': 0.06485215574502945, 'timestamp': '2025-09-30 22:47:33.098214', 'step': 21727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:33.155982', 'step': 21727, 'epoch': 3} {'type': 'loss', 'content': 0.1066603809595108, 'timestamp': '2025-09-30 22:47:33.163692', 'step': 21728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:33.225967', 'step': 21728, 'epoch': 3} {'type': 'loss', 'content': 0.08765291422605515, 'timestamp': '2025-09-30 22:47:33.229131', 'step': 21729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:33.289501', 'step': 21729, 'epoch': 3} {'type': 'loss', 'content': 0.09348997473716736, 'timestamp': '2025-09-30 22:47:33.294624', 'step': 21730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:33.353298', 'step': 21730, 'epoch': 3} {'type': 'loss', 'content': 0.07669313251972198, 'timestamp': '2025-09-30 22:47:33.357540', 'step': 21731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:33.418112', 'step': 21731, 'epoch': 3} {'type': 'loss', 'content': 0.10540615767240524, 'timestamp': '2025-09-30 22:47:33.435284', 'step': 21732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.498941', 'step': 21732, 'epoch': 3} {'type': 'loss', 'content': 0.012531230226159096, 'timestamp': '2025-09-30 22:47:33.502221', 'step': 21733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.560109', 'step': 21733, 'epoch': 3} {'type': 'loss', 'content': 0.030505431815981865, 'timestamp': '2025-09-30 22:47:33.564812', 'step': 21734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.622674', 'step': 21734, 'epoch': 3} {'type': 'loss', 'content': 0.05226032808423042, 'timestamp': '2025-09-30 22:47:33.626462', 'step': 21735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.687533', 'step': 21735, 'epoch': 3} {'type': 'loss', 'content': 0.057530321180820465, 'timestamp': '2025-09-30 22:47:33.702801', 'step': 21736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.761863', 'step': 21736, 'epoch': 3} {'type': 'loss', 'content': 0.05061899498105049, 'timestamp': '2025-09-30 22:47:33.765469', 'step': 21737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.828324', 'step': 21737, 'epoch': 3} {'type': 'loss', 'content': 0.068824902176857, 'timestamp': '2025-09-30 22:47:33.841578', 'step': 21738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:33.904455', 'step': 21738, 'epoch': 3} {'type': 'loss', 'content': 0.10510332137346268, 'timestamp': '2025-09-30 22:47:33.908755', 'step': 21739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:33.993307', 'step': 21739, 'epoch': 3} {'type': 'loss', 'content': 0.039571214467287064, 'timestamp': '2025-09-30 22:47:34.000460', 'step': 21740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:34.059049', 'step': 21740, 'epoch': 3} {'type': 'loss', 'content': 0.09859185665845871, 'timestamp': '2025-09-30 22:47:34.064244', 'step': 21741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:34.137069', 'step': 21741, 'epoch': 3} {'type': 'loss', 'content': 0.06453485041856766, 'timestamp': '2025-09-30 22:47:34.141819', 'step': 21742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:34.211320', 'step': 21742, 'epoch': 3} {'type': 'loss', 'content': 0.09490444511175156, 'timestamp': '2025-09-30 22:47:34.214957', 'step': 21743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:34.272835', 'step': 21743, 'epoch': 3} {'type': 'loss', 'content': 0.10215003788471222, 'timestamp': '2025-09-30 22:47:34.280414', 'step': 21744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:34.347199', 'step': 21744, 'epoch': 3} {'type': 'loss', 'content': 0.07303785532712936, 'timestamp': '2025-09-30 22:47:34.350901', 'step': 21745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:34.417055', 'step': 21745, 'epoch': 3} {'type': 'loss', 'content': 0.12115665525197983, 'timestamp': '2025-09-30 22:47:34.428181', 'step': 21746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:34.496777', 'step': 21746, 'epoch': 3} {'type': 'loss', 'content': 0.13584376871585846, 'timestamp': '2025-09-30 22:47:34.500588', 'step': 21747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:34.557952', 'step': 21747, 'epoch': 3} {'type': 'loss', 'content': 0.055184509605169296, 'timestamp': '2025-09-30 22:47:34.564868', 'step': 21748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:34.622112', 'step': 21748, 'epoch': 3} {'type': 'loss', 'content': 0.09510350972414017, 'timestamp': '2025-09-30 22:47:34.627809', 'step': 21749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:34.694581', 'step': 21749, 'epoch': 3} {'type': 'loss', 'content': 0.07330194860696793, 'timestamp': '2025-09-30 22:47:34.699913', 'step': 21750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:34.783973', 'step': 21750, 'epoch': 3} {'type': 'loss', 'content': 0.08974158763885498, 'timestamp': '2025-09-30 22:47:34.799279', 'step': 21751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:34.858710', 'step': 21751, 'epoch': 3} {'type': 'loss', 'content': 0.04737962409853935, 'timestamp': '2025-09-30 22:47:34.875582', 'step': 21752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:34.935706', 'step': 21752, 'epoch': 3} {'type': 'loss', 'content': 0.13432753086090088, 'timestamp': '2025-09-30 22:47:34.939274', 'step': 21753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:34.999515', 'step': 21753, 'epoch': 3} {'type': 'loss', 'content': 0.08286035060882568, 'timestamp': '2025-09-30 22:47:35.005500', 'step': 21754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:35.068254', 'step': 21754, 'epoch': 3} {'type': 'loss', 'content': 0.04931848868727684, 'timestamp': '2025-09-30 22:47:35.074635', 'step': 21755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:35.134866', 'step': 21755, 'epoch': 3} {'type': 'loss', 'content': 0.07201699167490005, 'timestamp': '2025-09-30 22:47:35.144093', 'step': 21756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:35.211048', 'step': 21756, 'epoch': 3} {'type': 'loss', 'content': 0.13232778012752533, 'timestamp': '2025-09-30 22:47:35.214634', 'step': 21757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:35.276531', 'step': 21757, 'epoch': 3} {'type': 'loss', 'content': 0.10459301620721817, 'timestamp': '2025-09-30 22:47:35.280511', 'step': 21758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:35.352880', 'step': 21758, 'epoch': 3} {'type': 'loss', 'content': 0.12272582203149796, 'timestamp': '2025-09-30 22:47:35.357497', 'step': 21759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:35.423358', 'step': 21759, 'epoch': 3} {'type': 'loss', 'content': 0.08369174599647522, 'timestamp': '2025-09-30 22:47:35.432036', 'step': 21760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:35.492628', 'step': 21760, 'epoch': 3} {'type': 'loss', 'content': 0.05796587094664574, 'timestamp': '2025-09-30 22:47:35.497115', 'step': 21761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:35.570656', 'step': 21761, 'epoch': 3} {'type': 'loss', 'content': 0.07209835201501846, 'timestamp': '2025-09-30 22:47:35.575561', 'step': 21762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:35.633897', 'step': 21762, 'epoch': 3} {'type': 'loss', 'content': 0.07940119504928589, 'timestamp': '2025-09-30 22:47:35.638742', 'step': 21763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:35.698331', 'step': 21763, 'epoch': 3} {'type': 'loss', 'content': 0.07849342375993729, 'timestamp': '2025-09-30 22:47:35.708344', 'step': 21764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:35.767718', 'step': 21764, 'epoch': 3} {'type': 'loss', 'content': 0.08668415248394012, 'timestamp': '2025-09-30 22:47:35.773115', 'step': 21765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:35.832305', 'step': 21765, 'epoch': 3} {'type': 'loss', 'content': 0.07247880846261978, 'timestamp': '2025-09-30 22:47:35.843920', 'step': 21766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:35.916828', 'step': 21766, 'epoch': 3} {'type': 'loss', 'content': 0.048627469688653946, 'timestamp': '2025-09-30 22:47:35.921005', 'step': 21767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:35.989256', 'step': 21767, 'epoch': 3} {'type': 'loss', 'content': 0.06332338601350784, 'timestamp': '2025-09-30 22:47:35.996448', 'step': 21768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:36.065764', 'step': 21768, 'epoch': 3} {'type': 'loss', 'content': 0.08814442902803421, 'timestamp': '2025-09-30 22:47:36.070092', 'step': 21769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:36.131874', 'step': 21769, 'epoch': 3} {'type': 'loss', 'content': 0.04033546894788742, 'timestamp': '2025-09-30 22:47:36.136835', 'step': 21770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:36.196554', 'step': 21770, 'epoch': 3} {'type': 'loss', 'content': 0.03221329674124718, 'timestamp': '2025-09-30 22:47:36.211334', 'step': 21771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:36.276550', 'step': 21771, 'epoch': 3} {'type': 'loss', 'content': 0.03740253672003746, 'timestamp': '2025-09-30 22:47:36.283851', 'step': 21772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:36.348835', 'step': 21772, 'epoch': 3} {'type': 'loss', 'content': 0.08756878972053528, 'timestamp': '2025-09-30 22:47:36.353202', 'step': 21773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:36.424579', 'step': 21773, 'epoch': 3} {'type': 'loss', 'content': 0.0880182757973671, 'timestamp': '2025-09-30 22:47:36.429471', 'step': 21774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:36.502237', 'step': 21774, 'epoch': 3} {'type': 'loss', 'content': 0.06629787385463715, 'timestamp': '2025-09-30 22:47:36.505944', 'step': 21775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:36.565674', 'step': 21775, 'epoch': 3} {'type': 'loss', 'content': 0.06058797612786293, 'timestamp': '2025-09-30 22:47:36.574674', 'step': 21776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:36.634399', 'step': 21776, 'epoch': 3} {'type': 'loss', 'content': 0.04150814563035965, 'timestamp': '2025-09-30 22:47:36.637391', 'step': 21777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:36.706354', 'step': 21777, 'epoch': 3} {'type': 'loss', 'content': 0.05167021229863167, 'timestamp': '2025-09-30 22:47:36.712101', 'step': 21778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:36.771205', 'step': 21778, 'epoch': 3} {'type': 'loss', 'content': 0.09069488197565079, 'timestamp': '2025-09-30 22:47:36.778158', 'step': 21779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:36.860734', 'step': 21779, 'epoch': 3} {'type': 'loss', 'content': 0.07648361474275589, 'timestamp': '2025-09-30 22:47:36.877967', 'step': 21780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:36.945373', 'step': 21780, 'epoch': 3} {'type': 'loss', 'content': 0.09046166390180588, 'timestamp': '2025-09-30 22:47:36.950511', 'step': 21781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:37.010007', 'step': 21781, 'epoch': 3} {'type': 'loss', 'content': 0.056050077080726624, 'timestamp': '2025-09-30 22:47:37.015032', 'step': 21782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:37.073379', 'step': 21782, 'epoch': 3} {'type': 'loss', 'content': 0.1675344854593277, 'timestamp': '2025-09-30 22:47:37.077311', 'step': 21783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:37.136130', 'step': 21783, 'epoch': 3} {'type': 'loss', 'content': 0.1130085289478302, 'timestamp': '2025-09-30 22:47:37.144613', 'step': 21784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:37.213784', 'step': 21784, 'epoch': 3} {'type': 'loss', 'content': 0.04561943560838699, 'timestamp': '2025-09-30 22:47:37.218221', 'step': 21785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:37.278649', 'step': 21785, 'epoch': 3} {'type': 'loss', 'content': 0.06278479844331741, 'timestamp': '2025-09-30 22:47:37.281772', 'step': 21786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:37.341658', 'step': 21786, 'epoch': 3} {'type': 'loss', 'content': 0.08191507309675217, 'timestamp': '2025-09-30 22:47:37.348974', 'step': 21787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:37.426634', 'step': 21787, 'epoch': 3} {'type': 'loss', 'content': 0.07553873211145401, 'timestamp': '2025-09-30 22:47:37.435843', 'step': 21788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:37.505173', 'step': 21788, 'epoch': 3} {'type': 'loss', 'content': 0.1094885841012001, 'timestamp': '2025-09-30 22:47:37.509930', 'step': 21789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:37.573581', 'step': 21789, 'epoch': 3} {'type': 'loss', 'content': 0.08544274419546127, 'timestamp': '2025-09-30 22:47:37.587907', 'step': 21790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:37.648644', 'step': 21790, 'epoch': 3} {'type': 'loss', 'content': 0.08299562335014343, 'timestamp': '2025-09-30 22:47:37.656390', 'step': 21791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:37.725866', 'step': 21791, 'epoch': 3} {'type': 'loss', 'content': 0.09093523770570755, 'timestamp': '2025-09-30 22:47:37.734117', 'step': 21792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:37.802872', 'step': 21792, 'epoch': 3} {'type': 'loss', 'content': 0.10745403170585632, 'timestamp': '2025-09-30 22:47:37.808812', 'step': 21793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:37.870259', 'step': 21793, 'epoch': 3} {'type': 'loss', 'content': 0.11623145639896393, 'timestamp': '2025-09-30 22:47:37.874983', 'step': 21794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:37.950150', 'step': 21794, 'epoch': 3} {'type': 'loss', 'content': 0.1680486798286438, 'timestamp': '2025-09-30 22:47:37.953472', 'step': 21795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:38.012703', 'step': 21795, 'epoch': 3} {'type': 'loss', 'content': 0.07199586182832718, 'timestamp': '2025-09-30 22:47:38.022830', 'step': 21796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:38.083555', 'step': 21796, 'epoch': 3} {'type': 'loss', 'content': 0.0896688923239708, 'timestamp': '2025-09-30 22:47:38.088989', 'step': 21797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:38.162779', 'step': 21797, 'epoch': 3} {'type': 'loss', 'content': 0.08446624875068665, 'timestamp': '2025-09-30 22:47:38.171553', 'step': 21798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:38.239125', 'step': 21798, 'epoch': 3} {'type': 'loss', 'content': 0.06355991214513779, 'timestamp': '2025-09-30 22:47:38.248957', 'step': 21799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:38.315683', 'step': 21799, 'epoch': 3} {'type': 'loss', 'content': 0.044073283672332764, 'timestamp': '2025-09-30 22:47:38.322860', 'step': 21800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:38.382599', 'step': 21800, 'epoch': 3} {'type': 'loss', 'content': 0.047449272125959396, 'timestamp': '2025-09-30 22:47:38.396164', 'step': 21801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:38.465302', 'step': 21801, 'epoch': 3} {'type': 'loss', 'content': 0.16210438311100006, 'timestamp': '2025-09-30 22:47:38.472687', 'step': 21802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:38.534437', 'step': 21802, 'epoch': 3} {'type': 'loss', 'content': 0.08755829930305481, 'timestamp': '2025-09-30 22:47:38.539005', 'step': 21803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:38.614654', 'step': 21803, 'epoch': 3} {'type': 'loss', 'content': 0.12876951694488525, 'timestamp': '2025-09-30 22:47:38.636715', 'step': 21804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:38.708955', 'step': 21804, 'epoch': 3} {'type': 'loss', 'content': 0.06812037527561188, 'timestamp': '2025-09-30 22:47:38.713142', 'step': 21805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:38.770954', 'step': 21805, 'epoch': 3} {'type': 'loss', 'content': 0.06014791131019592, 'timestamp': '2025-09-30 22:47:38.776317', 'step': 21806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:38.839552', 'step': 21806, 'epoch': 3} {'type': 'loss', 'content': 0.05548369511961937, 'timestamp': '2025-09-30 22:47:38.843865', 'step': 21807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:47:38.908323', 'step': 21807, 'epoch': 3} {'type': 'loss', 'content': 0.06671275198459625, 'timestamp': '2025-09-30 22:47:38.926373', 'step': 21808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:38.984640', 'step': 21808, 'epoch': 3} {'type': 'loss', 'content': 0.04431239515542984, 'timestamp': '2025-09-30 22:47:38.997412', 'step': 21809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:39.068231', 'step': 21809, 'epoch': 3} {'type': 'loss', 'content': 0.08408381044864655, 'timestamp': '2025-09-30 22:47:39.081870', 'step': 21810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:39.159621', 'step': 21810, 'epoch': 3} {'type': 'loss', 'content': 0.07896538078784943, 'timestamp': '2025-09-30 22:47:39.174162', 'step': 21811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:39.250761', 'step': 21811, 'epoch': 3} {'type': 'loss', 'content': 0.07720648497343063, 'timestamp': '2025-09-30 22:47:39.259353', 'step': 21812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:39.318285', 'step': 21812, 'epoch': 3} {'type': 'loss', 'content': 0.1770341992378235, 'timestamp': '2025-09-30 22:47:39.322786', 'step': 21813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:39.392710', 'step': 21813, 'epoch': 3} {'type': 'loss', 'content': 0.07863204926252365, 'timestamp': '2025-09-30 22:47:39.398054', 'step': 21814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:39.460598', 'step': 21814, 'epoch': 3} {'type': 'loss', 'content': 0.08949482440948486, 'timestamp': '2025-09-30 22:47:39.463912', 'step': 21815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:39.525372', 'step': 21815, 'epoch': 3} {'type': 'loss', 'content': 0.08409680426120758, 'timestamp': '2025-09-30 22:47:39.533048', 'step': 21816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:39.603863', 'step': 21816, 'epoch': 3} {'type': 'loss', 'content': 0.08013949543237686, 'timestamp': '2025-09-30 22:47:39.608557', 'step': 21817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:39.679897', 'step': 21817, 'epoch': 3} {'type': 'loss', 'content': 0.09593294560909271, 'timestamp': '2025-09-30 22:47:39.687769', 'step': 21818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:39.748291', 'step': 21818, 'epoch': 3} {'type': 'loss', 'content': 0.08216653764247894, 'timestamp': '2025-09-30 22:47:39.751638', 'step': 21819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:39.825217', 'step': 21819, 'epoch': 3} {'type': 'loss', 'content': 0.08796565979719162, 'timestamp': '2025-09-30 22:47:39.832635', 'step': 21820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:39.897551', 'step': 21820, 'epoch': 3} {'type': 'loss', 'content': 0.04233013838529587, 'timestamp': '2025-09-30 22:47:39.903056', 'step': 21821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:39.975843', 'step': 21821, 'epoch': 3} {'type': 'loss', 'content': 0.168031707406044, 'timestamp': '2025-09-30 22:47:39.981124', 'step': 21822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:40.057855', 'step': 21822, 'epoch': 3} {'type': 'loss', 'content': 0.1583210825920105, 'timestamp': '2025-09-30 22:47:40.072575', 'step': 21823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:40.145369', 'step': 21823, 'epoch': 3} {'type': 'loss', 'content': 0.030501969158649445, 'timestamp': '2025-09-30 22:47:40.153232', 'step': 21824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:40.217084', 'step': 21824, 'epoch': 3} {'type': 'loss', 'content': 0.10502847284078598, 'timestamp': '2025-09-30 22:47:40.222116', 'step': 21825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:40.282767', 'step': 21825, 'epoch': 3} {'type': 'loss', 'content': 0.12833315134048462, 'timestamp': '2025-09-30 22:47:40.288063', 'step': 21826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:47:40.346992', 'step': 21826, 'epoch': 3} {'type': 'loss', 'content': 0.07927461713552475, 'timestamp': '2025-09-30 22:47:40.353068', 'step': 21827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:40.414172', 'step': 21827, 'epoch': 3} {'type': 'loss', 'content': 0.06005587428808212, 'timestamp': '2025-09-30 22:47:40.432872', 'step': 21828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:40.503931', 'step': 21828, 'epoch': 3} {'type': 'loss', 'content': 0.03800420090556145, 'timestamp': '2025-09-30 22:47:40.507452', 'step': 21829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:40.569190', 'step': 21829, 'epoch': 3} {'type': 'loss', 'content': 0.1591065227985382, 'timestamp': '2025-09-30 22:47:40.572591', 'step': 21830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:40.632265', 'step': 21830, 'epoch': 3} {'type': 'loss', 'content': 0.07584580779075623, 'timestamp': '2025-09-30 22:47:40.638198', 'step': 21831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:40.698837', 'step': 21831, 'epoch': 3} {'type': 'loss', 'content': 0.07735143601894379, 'timestamp': '2025-09-30 22:47:40.707769', 'step': 21832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:40.771509', 'step': 21832, 'epoch': 3} {'type': 'loss', 'content': 0.04746202006936073, 'timestamp': '2025-09-30 22:47:40.787237', 'step': 21833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:40.855119', 'step': 21833, 'epoch': 3} {'type': 'loss', 'content': 0.06986972689628601, 'timestamp': '2025-09-30 22:47:40.859320', 'step': 21834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:40.928921', 'step': 21834, 'epoch': 3} {'type': 'loss', 'content': 0.11874525249004364, 'timestamp': '2025-09-30 22:47:40.934874', 'step': 21835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:40.998313', 'step': 21835, 'epoch': 3} {'type': 'loss', 'content': 0.07763531059026718, 'timestamp': '2025-09-30 22:47:41.018958', 'step': 21836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:41.081804', 'step': 21836, 'epoch': 3} {'type': 'loss', 'content': 0.056314148008823395, 'timestamp': '2025-09-30 22:47:41.088113', 'step': 21837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:41.162333', 'step': 21837, 'epoch': 3} {'type': 'loss', 'content': 0.02599925361573696, 'timestamp': '2025-09-30 22:47:41.177588', 'step': 21838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:41.249070', 'step': 21838, 'epoch': 3} {'type': 'loss', 'content': 0.09344109892845154, 'timestamp': '2025-09-30 22:47:41.252760', 'step': 21839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:41.315641', 'step': 21839, 'epoch': 3} {'type': 'loss', 'content': 0.04581332951784134, 'timestamp': '2025-09-30 22:47:41.325888', 'step': 21840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:41.386224', 'step': 21840, 'epoch': 3} {'type': 'loss', 'content': 0.029069000855088234, 'timestamp': '2025-09-30 22:47:41.390998', 'step': 21841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:41.453322', 'step': 21841, 'epoch': 3} {'type': 'loss', 'content': 0.10745382308959961, 'timestamp': '2025-09-30 22:47:41.457275', 'step': 21842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:41.522579', 'step': 21842, 'epoch': 3} {'type': 'loss', 'content': 0.0689171776175499, 'timestamp': '2025-09-30 22:47:41.528243', 'step': 21843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:41.596581', 'step': 21843, 'epoch': 3} {'type': 'loss', 'content': 0.07440256327390671, 'timestamp': '2025-09-30 22:47:41.617827', 'step': 21844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:41.685772', 'step': 21844, 'epoch': 3} {'type': 'loss', 'content': 0.1014937236905098, 'timestamp': '2025-09-30 22:47:41.690858', 'step': 21845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:47:41.750597', 'step': 21845, 'epoch': 3} {'type': 'loss', 'content': 0.05479130893945694, 'timestamp': '2025-09-30 22:47:41.764217', 'step': 21846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:41.827337', 'step': 21846, 'epoch': 3} {'type': 'loss', 'content': 0.07743976265192032, 'timestamp': '2025-09-30 22:47:41.832774', 'step': 21847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:41.894249', 'step': 21847, 'epoch': 3} {'type': 'loss', 'content': 0.08099035173654556, 'timestamp': '2025-09-30 22:47:41.903393', 'step': 21848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:41.964393', 'step': 21848, 'epoch': 3} {'type': 'loss', 'content': 0.12660954892635345, 'timestamp': '2025-09-30 22:47:41.968237', 'step': 21849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:42.045756', 'step': 21849, 'epoch': 3} {'type': 'loss', 'content': 0.10188327729701996, 'timestamp': '2025-09-30 22:47:42.051789', 'step': 21850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:42.112159', 'step': 21850, 'epoch': 3} {'type': 'loss', 'content': 0.051697198301553726, 'timestamp': '2025-09-30 22:47:42.117002', 'step': 21851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:42.175055', 'step': 21851, 'epoch': 3} {'type': 'loss', 'content': 0.018995335325598717, 'timestamp': '2025-09-30 22:47:42.184475', 'step': 21852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:42.272590', 'step': 21852, 'epoch': 3} {'type': 'loss', 'content': 0.013474552892148495, 'timestamp': '2025-09-30 22:47:42.278422', 'step': 21853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:42.356641', 'step': 21853, 'epoch': 3} {'type': 'loss', 'content': 0.061143383383750916, 'timestamp': '2025-09-30 22:47:42.361164', 'step': 21854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:42.435901', 'step': 21854, 'epoch': 3} {'type': 'loss', 'content': 0.06417766213417053, 'timestamp': '2025-09-30 22:47:42.440499', 'step': 21855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:42.501742', 'step': 21855, 'epoch': 3} {'type': 'loss', 'content': 0.08732958137989044, 'timestamp': '2025-09-30 22:47:42.509619', 'step': 21856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:42.579235', 'step': 21856, 'epoch': 3} {'type': 'loss', 'content': 0.04181956499814987, 'timestamp': '2025-09-30 22:47:42.593582', 'step': 21857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:42.667992', 'step': 21857, 'epoch': 3} {'type': 'loss', 'content': 0.058287858963012695, 'timestamp': '2025-09-30 22:47:42.671892', 'step': 21858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:47:42.731910', 'step': 21858, 'epoch': 3} {'type': 'loss', 'content': 0.07294303178787231, 'timestamp': '2025-09-30 22:47:42.738159', 'step': 21859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:42.798150', 'step': 21859, 'epoch': 3} {'type': 'loss', 'content': 0.07808699458837509, 'timestamp': '2025-09-30 22:47:42.805558', 'step': 21860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:42.864111', 'step': 21860, 'epoch': 3} {'type': 'loss', 'content': 0.13185907900333405, 'timestamp': '2025-09-30 22:47:42.872772', 'step': 21861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:42.936255', 'step': 21861, 'epoch': 3} {'type': 'loss', 'content': 0.068556047976017, 'timestamp': '2025-09-30 22:47:42.939618', 'step': 21862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:43.000089', 'step': 21862, 'epoch': 3} {'type': 'loss', 'content': 0.08405005931854248, 'timestamp': '2025-09-30 22:47:43.014155', 'step': 21863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:43.074824', 'step': 21863, 'epoch': 3} {'type': 'loss', 'content': 0.08872125297784805, 'timestamp': '2025-09-30 22:47:43.084567', 'step': 21864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:43.161319', 'step': 21864, 'epoch': 3} {'type': 'loss', 'content': 0.03880922123789787, 'timestamp': '2025-09-30 22:47:43.167231', 'step': 21865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:43.232875', 'step': 21865, 'epoch': 3} {'type': 'loss', 'content': 0.10124441981315613, 'timestamp': '2025-09-30 22:47:43.237995', 'step': 21866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:43.299778', 'step': 21866, 'epoch': 3} {'type': 'loss', 'content': 0.04784674569964409, 'timestamp': '2025-09-30 22:47:43.304543', 'step': 21867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:43.379422', 'step': 21867, 'epoch': 3} {'type': 'loss', 'content': 0.0315445140004158, 'timestamp': '2025-09-30 22:47:43.397696', 'step': 21868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:43.457857', 'step': 21868, 'epoch': 3} {'type': 'loss', 'content': 0.10133562237024307, 'timestamp': '2025-09-30 22:47:43.461044', 'step': 21869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:47:43.541002', 'step': 21869, 'epoch': 3} {'type': 'loss', 'content': 0.15372048318386078, 'timestamp': '2025-09-30 22:47:43.554160', 'step': 21870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:43.622646', 'step': 21870, 'epoch': 3} {'type': 'loss', 'content': 0.06642148643732071, 'timestamp': '2025-09-30 22:47:43.626979', 'step': 21871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:43.704862', 'step': 21871, 'epoch': 3} {'type': 'loss', 'content': 0.031113725155591965, 'timestamp': '2025-09-30 22:47:43.724591', 'step': 21872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:43.797252', 'step': 21872, 'epoch': 3} {'type': 'loss', 'content': 0.10786983370780945, 'timestamp': '2025-09-30 22:47:43.802345', 'step': 21873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:43.883387', 'step': 21873, 'epoch': 3} {'type': 'loss', 'content': 0.05230396240949631, 'timestamp': '2025-09-30 22:47:43.893217', 'step': 21874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:43.957348', 'step': 21874, 'epoch': 3} {'type': 'loss', 'content': 0.17270661890506744, 'timestamp': '2025-09-30 22:47:43.962261', 'step': 21875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:44.055841', 'step': 21875, 'epoch': 3} {'type': 'loss', 'content': 0.036264244467020035, 'timestamp': '2025-09-30 22:47:44.065436', 'step': 21876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:44.138119', 'step': 21876, 'epoch': 3} {'type': 'loss', 'content': 0.17642684280872345, 'timestamp': '2025-09-30 22:47:44.144127', 'step': 21877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:44.220347', 'step': 21877, 'epoch': 3} {'type': 'loss', 'content': 0.09101146459579468, 'timestamp': '2025-09-30 22:47:44.226982', 'step': 21878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:44.289502', 'step': 21878, 'epoch': 3} {'type': 'loss', 'content': 0.14249123632907867, 'timestamp': '2025-09-30 22:47:44.295894', 'step': 21879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:44.370172', 'step': 21879, 'epoch': 3} {'type': 'loss', 'content': 0.07697642594575882, 'timestamp': '2025-09-30 22:47:44.380657', 'step': 21880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:44.447400', 'step': 21880, 'epoch': 3} {'type': 'loss', 'content': 0.10226428508758545, 'timestamp': '2025-09-30 22:47:44.452447', 'step': 21881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:44.521409', 'step': 21881, 'epoch': 3} {'type': 'loss', 'content': 0.1298801153898239, 'timestamp': '2025-09-30 22:47:44.525449', 'step': 21882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:44.604388', 'step': 21882, 'epoch': 3} {'type': 'loss', 'content': 0.06839355826377869, 'timestamp': '2025-09-30 22:47:44.608648', 'step': 21883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:44.684218', 'step': 21883, 'epoch': 3} {'type': 'loss', 'content': 0.02993154525756836, 'timestamp': '2025-09-30 22:47:44.693228', 'step': 21884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:44.755884', 'step': 21884, 'epoch': 3} {'type': 'loss', 'content': 0.10370808094739914, 'timestamp': '2025-09-30 22:47:44.758917', 'step': 21885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:44.839341', 'step': 21885, 'epoch': 3} {'type': 'loss', 'content': 0.0322558619081974, 'timestamp': '2025-09-30 22:47:44.843914', 'step': 21886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:44.908185', 'step': 21886, 'epoch': 3} {'type': 'loss', 'content': 0.04179952293634415, 'timestamp': '2025-09-30 22:47:44.920184', 'step': 21887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:44.993081', 'step': 21887, 'epoch': 3} {'type': 'loss', 'content': 0.10309702903032303, 'timestamp': '2025-09-30 22:47:45.002619', 'step': 21888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:45.067549', 'step': 21888, 'epoch': 3} {'type': 'loss', 'content': 0.013677321374416351, 'timestamp': '2025-09-30 22:47:45.073195', 'step': 21889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:45.138108', 'step': 21889, 'epoch': 3} {'type': 'loss', 'content': 0.08554279804229736, 'timestamp': '2025-09-30 22:47:45.142741', 'step': 21890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:45.204158', 'step': 21890, 'epoch': 3} {'type': 'loss', 'content': 0.10171573609113693, 'timestamp': '2025-09-30 22:47:45.218909', 'step': 21891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:45.291717', 'step': 21891, 'epoch': 3} {'type': 'loss', 'content': 0.08859715610742569, 'timestamp': '2025-09-30 22:47:45.300460', 'step': 21892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:47:45.365189', 'step': 21892, 'epoch': 3} {'type': 'loss', 'content': 0.1714320331811905, 'timestamp': '2025-09-30 22:47:45.371136', 'step': 21893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:45.431230', 'step': 21893, 'epoch': 3} {'type': 'loss', 'content': 0.09067121148109436, 'timestamp': '2025-09-30 22:47:45.450026', 'step': 21894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:47:45.520949', 'step': 21894, 'epoch': 3} {'type': 'loss', 'content': 0.1280115246772766, 'timestamp': '2025-09-30 22:47:45.527500', 'step': 21895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:45.606397', 'step': 21895, 'epoch': 3} {'type': 'loss', 'content': 0.06490444391965866, 'timestamp': '2025-09-30 22:47:45.613883', 'step': 21896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:45.685656', 'step': 21896, 'epoch': 3} {'type': 'loss', 'content': 0.09260455518960953, 'timestamp': '2025-09-30 22:47:45.689216', 'step': 21897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:45.757380', 'step': 21897, 'epoch': 3} {'type': 'loss', 'content': 0.21019020676612854, 'timestamp': '2025-09-30 22:47:45.762475', 'step': 21898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:47:45.823170', 'step': 21898, 'epoch': 3} {'type': 'loss', 'content': 0.0776602253317833, 'timestamp': '2025-09-30 22:47:45.837683', 'step': 21899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:45.897578', 'step': 21899, 'epoch': 3} {'type': 'loss', 'content': 0.07481047511100769, 'timestamp': '2025-09-30 22:47:45.916179', 'step': 21900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:45.997495', 'step': 21900, 'epoch': 3} {'type': 'loss', 'content': 0.08044181019067764, 'timestamp': '2025-09-30 22:47:46.002010', 'step': 21901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:47:46.076200', 'step': 21901, 'epoch': 3} {'type': 'loss', 'content': 0.02541356161236763, 'timestamp': '2025-09-30 22:47:46.081250', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:48:01.404981', 'step': 21902, 'epoch': 3} {'type': 'pplx', 'content': 9666.909377337372, 'timestamp': '2025-09-30 22:48:01.411195', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:01.477459', 'step': 21902, 'epoch': 3} {'type': 'loss', 'content': 0.13814286887645721, 'timestamp': '2025-09-30 22:48:01.483085', 'step': 21903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:01.543905', 'step': 21903, 'epoch': 3} {'type': 'loss', 'content': 0.05495166406035423, 'timestamp': '2025-09-30 22:48:01.555267', 'step': 21904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:01.617763', 'step': 21904, 'epoch': 3} {'type': 'loss', 'content': 0.052253298461437225, 'timestamp': '2025-09-30 22:48:01.622400', 'step': 21905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:01.683297', 'step': 21905, 'epoch': 3} {'type': 'loss', 'content': 0.12115529179573059, 'timestamp': '2025-09-30 22:48:01.686884', 'step': 21906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:01.761238', 'step': 21906, 'epoch': 3} {'type': 'loss', 'content': 0.10847587883472443, 'timestamp': '2025-09-30 22:48:01.764740', 'step': 21907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:01.840958', 'step': 21907, 'epoch': 3} {'type': 'loss', 'content': 0.07071756571531296, 'timestamp': '2025-09-30 22:48:01.849657', 'step': 21908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:01.915824', 'step': 21908, 'epoch': 3} {'type': 'loss', 'content': 0.03845280781388283, 'timestamp': '2025-09-30 22:48:01.920267', 'step': 21909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:01.980937', 'step': 21909, 'epoch': 3} {'type': 'loss', 'content': 0.1254778653383255, 'timestamp': '2025-09-30 22:48:01.984996', 'step': 21910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:02.055671', 'step': 21910, 'epoch': 3} {'type': 'loss', 'content': 0.12220382690429688, 'timestamp': '2025-09-30 22:48:02.058969', 'step': 21911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:02.118760', 'step': 21911, 'epoch': 3} {'type': 'loss', 'content': 0.07092947512865067, 'timestamp': '2025-09-30 22:48:02.127239', 'step': 21912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:02.200842', 'step': 21912, 'epoch': 3} {'type': 'loss', 'content': 0.1161414384841919, 'timestamp': '2025-09-30 22:48:02.205207', 'step': 21913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:02.267977', 'step': 21913, 'epoch': 3} {'type': 'loss', 'content': 0.07637777924537659, 'timestamp': '2025-09-30 22:48:02.272004', 'step': 21914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:02.333114', 'step': 21914, 'epoch': 3} {'type': 'loss', 'content': 0.1796301305294037, 'timestamp': '2025-09-30 22:48:02.343238', 'step': 21915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:02.410647', 'step': 21915, 'epoch': 3} {'type': 'loss', 'content': 0.0771109089255333, 'timestamp': '2025-09-30 22:48:02.421783', 'step': 21916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:02.481442', 'step': 21916, 'epoch': 3} {'type': 'loss', 'content': 0.05539728328585625, 'timestamp': '2025-09-30 22:48:02.494746', 'step': 21917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:02.553880', 'step': 21917, 'epoch': 3} {'type': 'loss', 'content': 0.12883877754211426, 'timestamp': '2025-09-30 22:48:02.557510', 'step': 21918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:02.617271', 'step': 21918, 'epoch': 3} {'type': 'loss', 'content': 0.06283779442310333, 'timestamp': '2025-09-30 22:48:02.631755', 'step': 21919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:02.707279', 'step': 21919, 'epoch': 3} {'type': 'loss', 'content': 0.12532418966293335, 'timestamp': '2025-09-30 22:48:02.714755', 'step': 21920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:02.775244', 'step': 21920, 'epoch': 3} {'type': 'loss', 'content': 0.018937967717647552, 'timestamp': '2025-09-30 22:48:02.780281', 'step': 21921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:02.848351', 'step': 21921, 'epoch': 3} {'type': 'loss', 'content': 0.08018959313631058, 'timestamp': '2025-09-30 22:48:02.852188', 'step': 21922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:02.913102', 'step': 21922, 'epoch': 3} {'type': 'loss', 'content': 0.13654078543186188, 'timestamp': '2025-09-30 22:48:02.916859', 'step': 21923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:02.993265', 'step': 21923, 'epoch': 3} {'type': 'loss', 'content': 0.09578241407871246, 'timestamp': '2025-09-30 22:48:03.006059', 'step': 21924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:03.071699', 'step': 21924, 'epoch': 3} {'type': 'loss', 'content': 0.10129549354314804, 'timestamp': '2025-09-30 22:48:03.076490', 'step': 21925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:03.136143', 'step': 21925, 'epoch': 3} {'type': 'loss', 'content': 0.0667567104101181, 'timestamp': '2025-09-30 22:48:03.140569', 'step': 21926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:03.213215', 'step': 21926, 'epoch': 3} {'type': 'loss', 'content': 0.08335763961076736, 'timestamp': '2025-09-30 22:48:03.218199', 'step': 21927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:03.279511', 'step': 21927, 'epoch': 3} {'type': 'loss', 'content': 0.03965368494391441, 'timestamp': '2025-09-30 22:48:03.287717', 'step': 21928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:03.347624', 'step': 21928, 'epoch': 3} {'type': 'loss', 'content': 0.1345321238040924, 'timestamp': '2025-09-30 22:48:03.356630', 'step': 21929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:03.415229', 'step': 21929, 'epoch': 3} {'type': 'loss', 'content': 0.06916765868663788, 'timestamp': '2025-09-30 22:48:03.420306', 'step': 21930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:03.481426', 'step': 21930, 'epoch': 3} {'type': 'loss', 'content': 0.0633169561624527, 'timestamp': '2025-09-30 22:48:03.487279', 'step': 21931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:03.555125', 'step': 21931, 'epoch': 3} {'type': 'loss', 'content': 0.08774038404226303, 'timestamp': '2025-09-30 22:48:03.562075', 'step': 21932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:03.636346', 'step': 21932, 'epoch': 3} {'type': 'loss', 'content': 0.12572306394577026, 'timestamp': '2025-09-30 22:48:03.647422', 'step': 21933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:03.706734', 'step': 21933, 'epoch': 3} {'type': 'loss', 'content': 0.09360965341329575, 'timestamp': '2025-09-30 22:48:03.710636', 'step': 21934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:03.790463', 'step': 21934, 'epoch': 3} {'type': 'loss', 'content': 0.07113205641508102, 'timestamp': '2025-09-30 22:48:03.793873', 'step': 21935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:03.856049', 'step': 21935, 'epoch': 3} {'type': 'loss', 'content': 0.13102218508720398, 'timestamp': '2025-09-30 22:48:03.863280', 'step': 21936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:03.921814', 'step': 21936, 'epoch': 3} {'type': 'loss', 'content': 0.13104881346225739, 'timestamp': '2025-09-30 22:48:03.925375', 'step': 21937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:03.997533', 'step': 21937, 'epoch': 3} {'type': 'loss', 'content': 0.13788467645645142, 'timestamp': '2025-09-30 22:48:04.001277', 'step': 21938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:04.062471', 'step': 21938, 'epoch': 3} {'type': 'loss', 'content': 0.0755346342921257, 'timestamp': '2025-09-30 22:48:04.066634', 'step': 21939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:04.129510', 'step': 21939, 'epoch': 3} {'type': 'loss', 'content': 0.046047143638134, 'timestamp': '2025-09-30 22:48:04.138656', 'step': 21940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:04.197177', 'step': 21940, 'epoch': 3} {'type': 'loss', 'content': 0.13325102627277374, 'timestamp': '2025-09-30 22:48:04.201102', 'step': 21941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:04.285659', 'step': 21941, 'epoch': 3} {'type': 'loss', 'content': 0.06798775494098663, 'timestamp': '2025-09-30 22:48:04.289268', 'step': 21942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:04.346149', 'step': 21942, 'epoch': 3} {'type': 'loss', 'content': 0.0669272243976593, 'timestamp': '2025-09-30 22:48:04.358630', 'step': 21943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:04.425541', 'step': 21943, 'epoch': 3} {'type': 'loss', 'content': 0.01984453573822975, 'timestamp': '2025-09-30 22:48:04.433707', 'step': 21944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:04.492447', 'step': 21944, 'epoch': 3} {'type': 'loss', 'content': 0.10353921353816986, 'timestamp': '2025-09-30 22:48:04.498776', 'step': 21945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:04.556782', 'step': 21945, 'epoch': 3} {'type': 'loss', 'content': 0.04951881617307663, 'timestamp': '2025-09-30 22:48:04.560447', 'step': 21946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:04.621123', 'step': 21946, 'epoch': 3} {'type': 'loss', 'content': 0.09304963052272797, 'timestamp': '2025-09-30 22:48:04.625852', 'step': 21947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:04.696287', 'step': 21947, 'epoch': 3} {'type': 'loss', 'content': 0.056219927966594696, 'timestamp': '2025-09-30 22:48:04.703656', 'step': 21948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:48:04.761616', 'step': 21948, 'epoch': 3} {'type': 'loss', 'content': 0.06371649354696274, 'timestamp': '2025-09-30 22:48:04.767082', 'step': 21949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:04.826124', 'step': 21949, 'epoch': 3} {'type': 'loss', 'content': 0.09571579843759537, 'timestamp': '2025-09-30 22:48:04.830666', 'step': 21950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:04.890006', 'step': 21950, 'epoch': 3} {'type': 'loss', 'content': 0.06022913008928299, 'timestamp': '2025-09-30 22:48:04.893767', 'step': 21951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:04.954133', 'step': 21951, 'epoch': 3} {'type': 'loss', 'content': 0.05436651036143303, 'timestamp': '2025-09-30 22:48:04.962368', 'step': 21952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:05.032381', 'step': 21952, 'epoch': 3} {'type': 'loss', 'content': 0.08159749954938889, 'timestamp': '2025-09-30 22:48:05.036667', 'step': 21953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:05.107251', 'step': 21953, 'epoch': 3} {'type': 'loss', 'content': 0.09805094450712204, 'timestamp': '2025-09-30 22:48:05.112180', 'step': 21954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:05.172264', 'step': 21954, 'epoch': 3} {'type': 'loss', 'content': 0.06919515132904053, 'timestamp': '2025-09-30 22:48:05.176738', 'step': 21955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:05.234679', 'step': 21955, 'epoch': 3} {'type': 'loss', 'content': 0.07090654224157333, 'timestamp': '2025-09-30 22:48:05.248377', 'step': 21956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:05.314623', 'step': 21956, 'epoch': 3} {'type': 'loss', 'content': 0.07965352386236191, 'timestamp': '2025-09-30 22:48:05.327715', 'step': 21957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:05.400531', 'step': 21957, 'epoch': 3} {'type': 'loss', 'content': 0.0821535736322403, 'timestamp': '2025-09-30 22:48:05.404734', 'step': 21958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:05.477962', 'step': 21958, 'epoch': 3} {'type': 'loss', 'content': 0.09877125173807144, 'timestamp': '2025-09-30 22:48:05.487126', 'step': 21959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:05.552246', 'step': 21959, 'epoch': 3} {'type': 'loss', 'content': 0.07149918377399445, 'timestamp': '2025-09-30 22:48:05.568436', 'step': 21960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:05.638242', 'step': 21960, 'epoch': 3} {'type': 'loss', 'content': 0.06863472610712051, 'timestamp': '2025-09-30 22:48:05.641497', 'step': 21961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:05.712801', 'step': 21961, 'epoch': 3} {'type': 'loss', 'content': 0.12266068905591965, 'timestamp': '2025-09-30 22:48:05.716109', 'step': 21962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:05.784574', 'step': 21962, 'epoch': 3} {'type': 'loss', 'content': 0.06424178928136826, 'timestamp': '2025-09-30 22:48:05.788210', 'step': 21963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:05.854890', 'step': 21963, 'epoch': 3} {'type': 'loss', 'content': 0.07408081740140915, 'timestamp': '2025-09-30 22:48:05.861719', 'step': 21964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:05.919305', 'step': 21964, 'epoch': 3} {'type': 'loss', 'content': 0.05620097368955612, 'timestamp': '2025-09-30 22:48:05.924041', 'step': 21965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:05.981852', 'step': 21965, 'epoch': 3} {'type': 'loss', 'content': 0.06835447251796722, 'timestamp': '2025-09-30 22:48:05.986642', 'step': 21966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:06.048176', 'step': 21966, 'epoch': 3} {'type': 'loss', 'content': 0.12108933925628662, 'timestamp': '2025-09-30 22:48:06.052315', 'step': 21967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:06.136637', 'step': 21967, 'epoch': 3} {'type': 'loss', 'content': 0.1000542864203453, 'timestamp': '2025-09-30 22:48:06.153571', 'step': 21968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:06.211661', 'step': 21968, 'epoch': 3} {'type': 'loss', 'content': 0.07894454151391983, 'timestamp': '2025-09-30 22:48:06.217162', 'step': 21969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:06.274966', 'step': 21969, 'epoch': 3} {'type': 'loss', 'content': 0.058145441114902496, 'timestamp': '2025-09-30 22:48:06.279738', 'step': 21970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:06.343878', 'step': 21970, 'epoch': 3} {'type': 'loss', 'content': 0.08150190860033035, 'timestamp': '2025-09-30 22:48:06.356409', 'step': 21971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:06.423570', 'step': 21971, 'epoch': 3} {'type': 'loss', 'content': 0.149169459939003, 'timestamp': '2025-09-30 22:48:06.443268', 'step': 21972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:06.524788', 'step': 21972, 'epoch': 3} {'type': 'loss', 'content': 0.16629287600517273, 'timestamp': '2025-09-30 22:48:06.536689', 'step': 21973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:06.604525', 'step': 21973, 'epoch': 3} {'type': 'loss', 'content': 0.11515075713396072, 'timestamp': '2025-09-30 22:48:06.608176', 'step': 21974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:06.668916', 'step': 21974, 'epoch': 3} {'type': 'loss', 'content': 0.13144321739673615, 'timestamp': '2025-09-30 22:48:06.681205', 'step': 21975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:06.760175', 'step': 21975, 'epoch': 3} {'type': 'loss', 'content': 0.029335085302591324, 'timestamp': '2025-09-30 22:48:06.767871', 'step': 21976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:06.828865', 'step': 21976, 'epoch': 3} {'type': 'loss', 'content': 0.1626768708229065, 'timestamp': '2025-09-30 22:48:06.832447', 'step': 21977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:06.891166', 'step': 21977, 'epoch': 3} {'type': 'loss', 'content': 0.12697163224220276, 'timestamp': '2025-09-30 22:48:06.894765', 'step': 21978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:06.975210', 'step': 21978, 'epoch': 3} {'type': 'loss', 'content': 0.17891302704811096, 'timestamp': '2025-09-30 22:48:06.990259', 'step': 21979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:07.049172', 'step': 21979, 'epoch': 3} {'type': 'loss', 'content': 0.22505073249340057, 'timestamp': '2025-09-30 22:48:07.057849', 'step': 21980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:07.117460', 'step': 21980, 'epoch': 3} {'type': 'loss', 'content': 0.08362363278865814, 'timestamp': '2025-09-30 22:48:07.121028', 'step': 21981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:07.180545', 'step': 21981, 'epoch': 3} {'type': 'loss', 'content': 0.16038377583026886, 'timestamp': '2025-09-30 22:48:07.184925', 'step': 21982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:07.254896', 'step': 21982, 'epoch': 3} {'type': 'loss', 'content': 0.16511228680610657, 'timestamp': '2025-09-30 22:48:07.258547', 'step': 21983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:07.317521', 'step': 21983, 'epoch': 3} {'type': 'loss', 'content': 0.09923358261585236, 'timestamp': '2025-09-30 22:48:07.325552', 'step': 21984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:07.385893', 'step': 21984, 'epoch': 3} {'type': 'loss', 'content': 0.0578790158033371, 'timestamp': '2025-09-30 22:48:07.393497', 'step': 21985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:07.452629', 'step': 21985, 'epoch': 3} {'type': 'loss', 'content': 0.19126610457897186, 'timestamp': '2025-09-30 22:48:07.456809', 'step': 21986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:48:07.515573', 'step': 21986, 'epoch': 3} {'type': 'loss', 'content': 0.14145106077194214, 'timestamp': '2025-09-30 22:48:07.525166', 'step': 21987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:07.586098', 'step': 21987, 'epoch': 3} {'type': 'loss', 'content': 0.024800289422273636, 'timestamp': '2025-09-30 22:48:07.603663', 'step': 21988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:07.663080', 'step': 21988, 'epoch': 3} {'type': 'loss', 'content': 0.08405622839927673, 'timestamp': '2025-09-30 22:48:07.673286', 'step': 21989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:07.738499', 'step': 21989, 'epoch': 3} {'type': 'loss', 'content': 0.09320610761642456, 'timestamp': '2025-09-30 22:48:07.744817', 'step': 21990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:07.804589', 'step': 21990, 'epoch': 3} {'type': 'loss', 'content': 0.11129347234964371, 'timestamp': '2025-09-30 22:48:07.808333', 'step': 21991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:07.868565', 'step': 21991, 'epoch': 3} {'type': 'loss', 'content': 0.08841568231582642, 'timestamp': '2025-09-30 22:48:07.877786', 'step': 21992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:07.937829', 'step': 21992, 'epoch': 3} {'type': 'loss', 'content': 0.044876303523778915, 'timestamp': '2025-09-30 22:48:07.942249', 'step': 21993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:08.008338', 'step': 21993, 'epoch': 3} {'type': 'loss', 'content': 0.09671783447265625, 'timestamp': '2025-09-30 22:48:08.013184', 'step': 21994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:08.073903', 'step': 21994, 'epoch': 3} {'type': 'loss', 'content': 0.052317701280117035, 'timestamp': '2025-09-30 22:48:08.078029', 'step': 21995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:08.139065', 'step': 21995, 'epoch': 3} {'type': 'loss', 'content': 0.07966595143079758, 'timestamp': '2025-09-30 22:48:08.151756', 'step': 21996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:08.213502', 'step': 21996, 'epoch': 3} {'type': 'loss', 'content': 0.059136152267456055, 'timestamp': '2025-09-30 22:48:08.216934', 'step': 21997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:08.286475', 'step': 21997, 'epoch': 3} {'type': 'loss', 'content': 0.1320149451494217, 'timestamp': '2025-09-30 22:48:08.290237', 'step': 21998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:08.350623', 'step': 21998, 'epoch': 3} {'type': 'loss', 'content': 0.06526431441307068, 'timestamp': '2025-09-30 22:48:08.355153', 'step': 21999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:08.426365', 'step': 21999, 'epoch': 3} {'type': 'loss', 'content': 0.058608151972293854, 'timestamp': '2025-09-30 22:48:08.435108', 'step': 22000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 22000', 'timestamp': '2025-09-30 22:48:08.872053', 'step': 22000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:08.949985', 'step': 22000, 'epoch': 3} {'type': 'loss', 'content': 0.11211080849170685, 'timestamp': '2025-09-30 22:48:08.954054', 'step': 22001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.018235', 'step': 22001, 'epoch': 3} {'type': 'loss', 'content': 0.07540389150381088, 'timestamp': '2025-09-30 22:48:09.021946', 'step': 22002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:09.097868', 'step': 22002, 'epoch': 3} {'type': 'loss', 'content': 0.07896368950605392, 'timestamp': '2025-09-30 22:48:09.103053', 'step': 22003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.163963', 'step': 22003, 'epoch': 3} {'type': 'loss', 'content': 0.08370387554168701, 'timestamp': '2025-09-30 22:48:09.171828', 'step': 22004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:09.236995', 'step': 22004, 'epoch': 3} {'type': 'loss', 'content': 0.13366349041461945, 'timestamp': '2025-09-30 22:48:09.246945', 'step': 22005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.304847', 'step': 22005, 'epoch': 3} {'type': 'loss', 'content': 0.0947088897228241, 'timestamp': '2025-09-30 22:48:09.307493', 'step': 22006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.365326', 'step': 22006, 'epoch': 3} {'type': 'loss', 'content': 0.14352959394454956, 'timestamp': '2025-09-30 22:48:09.367926', 'step': 22007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.426109', 'step': 22007, 'epoch': 3} {'type': 'loss', 'content': 0.09047973901033401, 'timestamp': '2025-09-30 22:48:09.434558', 'step': 22008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.493307', 'step': 22008, 'epoch': 3} {'type': 'loss', 'content': 0.05604710429906845, 'timestamp': '2025-09-30 22:48:09.497657', 'step': 22009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:09.571607', 'step': 22009, 'epoch': 3} {'type': 'loss', 'content': 0.010234998539090157, 'timestamp': '2025-09-30 22:48:09.574306', 'step': 22010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.633059', 'step': 22010, 'epoch': 3} {'type': 'loss', 'content': 0.08839748799800873, 'timestamp': '2025-09-30 22:48:09.637250', 'step': 22011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:09.711385', 'step': 22011, 'epoch': 3} {'type': 'loss', 'content': 0.03126710280776024, 'timestamp': '2025-09-30 22:48:09.718801', 'step': 22012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:09.778111', 'step': 22012, 'epoch': 3} {'type': 'loss', 'content': 0.07428423315286636, 'timestamp': '2025-09-30 22:48:09.782130', 'step': 22013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:09.842206', 'step': 22013, 'epoch': 3} {'type': 'loss', 'content': 0.04163576290011406, 'timestamp': '2025-09-30 22:48:09.846335', 'step': 22014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:09.907931', 'step': 22014, 'epoch': 3} {'type': 'loss', 'content': 0.15615516901016235, 'timestamp': '2025-09-30 22:48:09.911309', 'step': 22015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:09.971685', 'step': 22015, 'epoch': 3} {'type': 'loss', 'content': 0.11454445868730545, 'timestamp': '2025-09-30 22:48:09.981296', 'step': 22016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:10.045301', 'step': 22016, 'epoch': 3} {'type': 'loss', 'content': 0.13111186027526855, 'timestamp': '2025-09-30 22:48:10.059222', 'step': 22017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:10.122764', 'step': 22017, 'epoch': 3} {'type': 'loss', 'content': 0.06813722103834152, 'timestamp': '2025-09-30 22:48:10.128417', 'step': 22018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:10.216643', 'step': 22018, 'epoch': 3} {'type': 'loss', 'content': 0.058186620473861694, 'timestamp': '2025-09-30 22:48:10.221635', 'step': 22019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:10.281645', 'step': 22019, 'epoch': 3} {'type': 'loss', 'content': 0.08312198519706726, 'timestamp': '2025-09-30 22:48:10.290047', 'step': 22020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:10.349230', 'step': 22020, 'epoch': 3} {'type': 'loss', 'content': 0.14701154828071594, 'timestamp': '2025-09-30 22:48:10.353528', 'step': 22021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:10.414899', 'step': 22021, 'epoch': 3} {'type': 'loss', 'content': 0.15200723707675934, 'timestamp': '2025-09-30 22:48:10.418843', 'step': 22022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:10.487201', 'step': 22022, 'epoch': 3} {'type': 'loss', 'content': 0.0944659411907196, 'timestamp': '2025-09-30 22:48:10.496862', 'step': 22023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:10.563218', 'step': 22023, 'epoch': 3} {'type': 'loss', 'content': 0.16638009250164032, 'timestamp': '2025-09-30 22:48:10.572873', 'step': 22024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:10.630550', 'step': 22024, 'epoch': 3} {'type': 'loss', 'content': 0.07547815889120102, 'timestamp': '2025-09-30 22:48:10.641426', 'step': 22025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:10.706957', 'step': 22025, 'epoch': 3} {'type': 'loss', 'content': 0.11325079202651978, 'timestamp': '2025-09-30 22:48:10.710411', 'step': 22026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:10.776364', 'step': 22026, 'epoch': 3} {'type': 'loss', 'content': 0.0379863865673542, 'timestamp': '2025-09-30 22:48:10.780254', 'step': 22027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:48:10.847443', 'step': 22027, 'epoch': 3} {'type': 'loss', 'content': 0.046873267740011215, 'timestamp': '2025-09-30 22:48:10.866690', 'step': 22028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:10.923898', 'step': 22028, 'epoch': 3} {'type': 'loss', 'content': 0.039796289056539536, 'timestamp': '2025-09-30 22:48:10.928328', 'step': 22029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:10.991256', 'step': 22029, 'epoch': 3} {'type': 'loss', 'content': 0.06944368779659271, 'timestamp': '2025-09-30 22:48:10.996866', 'step': 22030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:11.059766', 'step': 22030, 'epoch': 3} {'type': 'loss', 'content': 0.0909850224852562, 'timestamp': '2025-09-30 22:48:11.063540', 'step': 22031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:11.125319', 'step': 22031, 'epoch': 3} {'type': 'loss', 'content': 0.1213151067495346, 'timestamp': '2025-09-30 22:48:11.131682', 'step': 22032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:11.196094', 'step': 22032, 'epoch': 3} {'type': 'loss', 'content': 0.07631252706050873, 'timestamp': '2025-09-30 22:48:11.199922', 'step': 22033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:11.257072', 'step': 22033, 'epoch': 3} {'type': 'loss', 'content': 0.043145425617694855, 'timestamp': '2025-09-30 22:48:11.260039', 'step': 22034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:11.322662', 'step': 22034, 'epoch': 3} {'type': 'loss', 'content': 0.11312130093574524, 'timestamp': '2025-09-30 22:48:11.325968', 'step': 22035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:11.383474', 'step': 22035, 'epoch': 3} {'type': 'loss', 'content': 0.056768808513879776, 'timestamp': '2025-09-30 22:48:11.395302', 'step': 22036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:11.452061', 'step': 22036, 'epoch': 3} {'type': 'loss', 'content': 0.09016801416873932, 'timestamp': '2025-09-30 22:48:11.457162', 'step': 22037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:11.516219', 'step': 22037, 'epoch': 3} {'type': 'loss', 'content': 0.15383358299732208, 'timestamp': '2025-09-30 22:48:11.519321', 'step': 22038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:11.585766', 'step': 22038, 'epoch': 3} {'type': 'loss', 'content': 0.03695935755968094, 'timestamp': '2025-09-30 22:48:11.588436', 'step': 22039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:11.650971', 'step': 22039, 'epoch': 3} {'type': 'loss', 'content': 0.08883843570947647, 'timestamp': '2025-09-30 22:48:11.657320', 'step': 22040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:11.719126', 'step': 22040, 'epoch': 3} {'type': 'loss', 'content': 0.11454673856496811, 'timestamp': '2025-09-30 22:48:11.722417', 'step': 22041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:11.780556', 'step': 22041, 'epoch': 3} {'type': 'loss', 'content': 0.0406232587993145, 'timestamp': '2025-09-30 22:48:11.784088', 'step': 22042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:11.848126', 'step': 22042, 'epoch': 3} {'type': 'loss', 'content': 0.07610245048999786, 'timestamp': '2025-09-30 22:48:11.851244', 'step': 22043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:11.914655', 'step': 22043, 'epoch': 3} {'type': 'loss', 'content': 0.07898449897766113, 'timestamp': '2025-09-30 22:48:11.926246', 'step': 22044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:12.016794', 'step': 22044, 'epoch': 3} {'type': 'loss', 'content': 0.0417025201022625, 'timestamp': '2025-09-30 22:48:12.020680', 'step': 22045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:12.081574', 'step': 22045, 'epoch': 3} {'type': 'loss', 'content': 0.05452451854944229, 'timestamp': '2025-09-30 22:48:12.085563', 'step': 22046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:12.145068', 'step': 22046, 'epoch': 3} {'type': 'loss', 'content': 0.05308384448289871, 'timestamp': '2025-09-30 22:48:12.152631', 'step': 22047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:12.224966', 'step': 22047, 'epoch': 3} {'type': 'loss', 'content': 0.13812395930290222, 'timestamp': '2025-09-30 22:48:12.231655', 'step': 22048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:12.290864', 'step': 22048, 'epoch': 3} {'type': 'loss', 'content': 0.04917062073945999, 'timestamp': '2025-09-30 22:48:12.294005', 'step': 22049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:12.352956', 'step': 22049, 'epoch': 3} {'type': 'loss', 'content': 0.11733487248420715, 'timestamp': '2025-09-30 22:48:12.355642', 'step': 22050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:12.416526', 'step': 22050, 'epoch': 3} {'type': 'loss', 'content': 0.046307217329740524, 'timestamp': '2025-09-30 22:48:12.419908', 'step': 22051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:12.497961', 'step': 22051, 'epoch': 3} {'type': 'loss', 'content': 0.06471671164035797, 'timestamp': '2025-09-30 22:48:12.510781', 'step': 22052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:12.569068', 'step': 22052, 'epoch': 3} {'type': 'loss', 'content': 0.0791960060596466, 'timestamp': '2025-09-30 22:48:12.571915', 'step': 22053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:12.639793', 'step': 22053, 'epoch': 3} {'type': 'loss', 'content': 0.03501996770501137, 'timestamp': '2025-09-30 22:48:12.643539', 'step': 22054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:12.701090', 'step': 22054, 'epoch': 3} {'type': 'loss', 'content': 0.07320396602153778, 'timestamp': '2025-09-30 22:48:12.708732', 'step': 22055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:12.784050', 'step': 22055, 'epoch': 3} {'type': 'loss', 'content': 0.08659769594669342, 'timestamp': '2025-09-30 22:48:12.791789', 'step': 22056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:12.854066', 'step': 22056, 'epoch': 3} {'type': 'loss', 'content': 0.052772119641304016, 'timestamp': '2025-09-30 22:48:12.860637', 'step': 22057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:12.918617', 'step': 22057, 'epoch': 3} {'type': 'loss', 'content': 0.031157350167632103, 'timestamp': '2025-09-30 22:48:12.927185', 'step': 22058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:12.993774', 'step': 22058, 'epoch': 3} {'type': 'loss', 'content': 0.03875812888145447, 'timestamp': '2025-09-30 22:48:13.001113', 'step': 22059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:13.062680', 'step': 22059, 'epoch': 3} {'type': 'loss', 'content': 0.037046827375888824, 'timestamp': '2025-09-30 22:48:13.076189', 'step': 22060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:13.140260', 'step': 22060, 'epoch': 3} {'type': 'loss', 'content': 0.06164579838514328, 'timestamp': '2025-09-30 22:48:13.142738', 'step': 22061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:13.204407', 'step': 22061, 'epoch': 3} {'type': 'loss', 'content': 0.036318931728601456, 'timestamp': '2025-09-30 22:48:13.211270', 'step': 22062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:13.274212', 'step': 22062, 'epoch': 3} {'type': 'loss', 'content': 0.0793704017996788, 'timestamp': '2025-09-30 22:48:13.277288', 'step': 22063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:13.340415', 'step': 22063, 'epoch': 3} {'type': 'loss', 'content': 0.1278257817029953, 'timestamp': '2025-09-30 22:48:13.352738', 'step': 22064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:13.414931', 'step': 22064, 'epoch': 3} {'type': 'loss', 'content': 0.017801132053136826, 'timestamp': '2025-09-30 22:48:13.417873', 'step': 22065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:13.492955', 'step': 22065, 'epoch': 3} {'type': 'loss', 'content': 0.10942193120718002, 'timestamp': '2025-09-30 22:48:13.505960', 'step': 22066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:13.562666', 'step': 22066, 'epoch': 3} {'type': 'loss', 'content': 0.05080744996666908, 'timestamp': '2025-09-30 22:48:13.573560', 'step': 22067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:13.630622', 'step': 22067, 'epoch': 3} {'type': 'loss', 'content': 0.07701435685157776, 'timestamp': '2025-09-30 22:48:13.638774', 'step': 22068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:13.706826', 'step': 22068, 'epoch': 3} {'type': 'loss', 'content': 0.15305888652801514, 'timestamp': '2025-09-30 22:48:13.709112', 'step': 22069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:13.766810', 'step': 22069, 'epoch': 3} {'type': 'loss', 'content': 0.0762028619647026, 'timestamp': '2025-09-30 22:48:13.769629', 'step': 22070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:13.828264', 'step': 22070, 'epoch': 3} {'type': 'loss', 'content': 0.04961688444018364, 'timestamp': '2025-09-30 22:48:13.830535', 'step': 22071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:13.890408', 'step': 22071, 'epoch': 3} {'type': 'loss', 'content': 0.09013966470956802, 'timestamp': '2025-09-30 22:48:13.895952', 'step': 22072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:13.951396', 'step': 22072, 'epoch': 3} {'type': 'loss', 'content': 0.05399515852332115, 'timestamp': '2025-09-30 22:48:13.956687', 'step': 22073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:14.012874', 'step': 22073, 'epoch': 3} {'type': 'loss', 'content': 0.06782988458871841, 'timestamp': '2025-09-30 22:48:14.026138', 'step': 22074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:14.106836', 'step': 22074, 'epoch': 3} {'type': 'loss', 'content': 0.05022382363677025, 'timestamp': '2025-09-30 22:48:14.109136', 'step': 22075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:14.166762', 'step': 22075, 'epoch': 3} {'type': 'loss', 'content': 0.06020887568593025, 'timestamp': '2025-09-30 22:48:14.173124', 'step': 22076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:14.231042', 'step': 22076, 'epoch': 3} {'type': 'loss', 'content': 0.05539558455348015, 'timestamp': '2025-09-30 22:48:14.233685', 'step': 22077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:48:14.293710', 'step': 22077, 'epoch': 3} {'type': 'loss', 'content': 0.1085231602191925, 'timestamp': '2025-09-30 22:48:14.296040', 'step': 22078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:14.352406', 'step': 22078, 'epoch': 3} {'type': 'loss', 'content': 0.1624908745288849, 'timestamp': '2025-09-30 22:48:14.354925', 'step': 22079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:14.423027', 'step': 22079, 'epoch': 3} {'type': 'loss', 'content': 0.033642444759607315, 'timestamp': '2025-09-30 22:48:14.428588', 'step': 22080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:14.486297', 'step': 22080, 'epoch': 3} {'type': 'loss', 'content': 0.17864421010017395, 'timestamp': '2025-09-30 22:48:14.490515', 'step': 22081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:14.551209', 'step': 22081, 'epoch': 3} {'type': 'loss', 'content': 0.10032694786787033, 'timestamp': '2025-09-30 22:48:14.553907', 'step': 22082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:14.610650', 'step': 22082, 'epoch': 3} {'type': 'loss', 'content': 0.11981332302093506, 'timestamp': '2025-09-30 22:48:14.612822', 'step': 22083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:14.670107', 'step': 22083, 'epoch': 3} {'type': 'loss', 'content': 0.08276313543319702, 'timestamp': '2025-09-30 22:48:14.676669', 'step': 22084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:14.733582', 'step': 22084, 'epoch': 3} {'type': 'loss', 'content': 0.0852961614727974, 'timestamp': '2025-09-30 22:48:14.736348', 'step': 22085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:14.807852', 'step': 22085, 'epoch': 3} {'type': 'loss', 'content': 0.06999968737363815, 'timestamp': '2025-09-30 22:48:14.810059', 'step': 22086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:14.869273', 'step': 22086, 'epoch': 3} {'type': 'loss', 'content': 0.1608196645975113, 'timestamp': '2025-09-30 22:48:14.871569', 'step': 22087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:14.932300', 'step': 22087, 'epoch': 3} {'type': 'loss', 'content': 0.053473375737667084, 'timestamp': '2025-09-30 22:48:14.939065', 'step': 22088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:14.996808', 'step': 22088, 'epoch': 3} {'type': 'loss', 'content': 0.11663173884153366, 'timestamp': '2025-09-30 22:48:14.999305', 'step': 22089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:15.071820', 'step': 22089, 'epoch': 3} {'type': 'loss', 'content': 0.06973610818386078, 'timestamp': '2025-09-30 22:48:15.074160', 'step': 22090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:15.138923', 'step': 22090, 'epoch': 3} {'type': 'loss', 'content': 0.06613928824663162, 'timestamp': '2025-09-30 22:48:15.141326', 'step': 22091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:15.202702', 'step': 22091, 'epoch': 3} {'type': 'loss', 'content': 0.10649306327104568, 'timestamp': '2025-09-30 22:48:15.208592', 'step': 22092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:15.268710', 'step': 22092, 'epoch': 3} {'type': 'loss', 'content': 0.07729480415582657, 'timestamp': '2025-09-30 22:48:15.272742', 'step': 22093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:15.329313', 'step': 22093, 'epoch': 3} {'type': 'loss', 'content': 0.06838527321815491, 'timestamp': '2025-09-30 22:48:15.331485', 'step': 22094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:15.401661', 'step': 22094, 'epoch': 3} {'type': 'loss', 'content': 0.05849168822169304, 'timestamp': '2025-09-30 22:48:15.404391', 'step': 22095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:15.462964', 'step': 22095, 'epoch': 3} {'type': 'loss', 'content': 0.0930929109454155, 'timestamp': '2025-09-30 22:48:15.469934', 'step': 22096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:15.529691', 'step': 22096, 'epoch': 3} {'type': 'loss', 'content': 0.13226613402366638, 'timestamp': '2025-09-30 22:48:15.532736', 'step': 22097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:15.603768', 'step': 22097, 'epoch': 3} {'type': 'loss', 'content': 0.07789389789104462, 'timestamp': '2025-09-30 22:48:15.606385', 'step': 22098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:15.666407', 'step': 22098, 'epoch': 3} {'type': 'loss', 'content': 0.0482836477458477, 'timestamp': '2025-09-30 22:48:15.669417', 'step': 22099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:15.730632', 'step': 22099, 'epoch': 3} {'type': 'loss', 'content': 0.0925452932715416, 'timestamp': '2025-09-30 22:48:15.736677', 'step': 22100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:15.797420', 'step': 22100, 'epoch': 3} {'type': 'loss', 'content': 0.13987335562705994, 'timestamp': '2025-09-30 22:48:15.800757', 'step': 22101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:15.879956', 'step': 22101, 'epoch': 3} {'type': 'loss', 'content': 0.08048196136951447, 'timestamp': '2025-09-30 22:48:15.884766', 'step': 22102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:15.943458', 'step': 22102, 'epoch': 3} {'type': 'loss', 'content': 0.05775323510169983, 'timestamp': '2025-09-30 22:48:15.955262', 'step': 22103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:16.022167', 'step': 22103, 'epoch': 3} {'type': 'loss', 'content': 0.1288692206144333, 'timestamp': '2025-09-30 22:48:16.030190', 'step': 22104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.095801', 'step': 22104, 'epoch': 3} {'type': 'loss', 'content': 0.1286972016096115, 'timestamp': '2025-09-30 22:48:16.103070', 'step': 22105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.166056', 'step': 22105, 'epoch': 3} {'type': 'loss', 'content': 0.06340697407722473, 'timestamp': '2025-09-30 22:48:16.172059', 'step': 22106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.230384', 'step': 22106, 'epoch': 3} {'type': 'loss', 'content': 0.10103969275951385, 'timestamp': '2025-09-30 22:48:16.233173', 'step': 22107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.291093', 'step': 22107, 'epoch': 3} {'type': 'loss', 'content': 0.08786512911319733, 'timestamp': '2025-09-30 22:48:16.297624', 'step': 22108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:16.359440', 'step': 22108, 'epoch': 3} {'type': 'loss', 'content': 0.09396591782569885, 'timestamp': '2025-09-30 22:48:16.361819', 'step': 22109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:16.420185', 'step': 22109, 'epoch': 3} {'type': 'loss', 'content': 0.06057174876332283, 'timestamp': '2025-09-30 22:48:16.424587', 'step': 22110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.495565', 'step': 22110, 'epoch': 3} {'type': 'loss', 'content': 0.12052303552627563, 'timestamp': '2025-09-30 22:48:16.499442', 'step': 22111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.558537', 'step': 22111, 'epoch': 3} {'type': 'loss', 'content': 0.10746404528617859, 'timestamp': '2025-09-30 22:48:16.570739', 'step': 22112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:16.638925', 'step': 22112, 'epoch': 3} {'type': 'loss', 'content': 0.11351912468671799, 'timestamp': '2025-09-30 22:48:16.648463', 'step': 22113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.708181', 'step': 22113, 'epoch': 3} {'type': 'loss', 'content': 0.10338377207517624, 'timestamp': '2025-09-30 22:48:16.711016', 'step': 22114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:16.785805', 'step': 22114, 'epoch': 3} {'type': 'loss', 'content': 0.11980811506509781, 'timestamp': '2025-09-30 22:48:16.799464', 'step': 22115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:16.870142', 'step': 22115, 'epoch': 3} {'type': 'loss', 'content': 0.12331820279359818, 'timestamp': '2025-09-30 22:48:16.876440', 'step': 22116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:16.933985', 'step': 22116, 'epoch': 3} {'type': 'loss', 'content': 0.1024114340543747, 'timestamp': '2025-09-30 22:48:16.938437', 'step': 22117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:16.996006', 'step': 22117, 'epoch': 3} {'type': 'loss', 'content': 0.023436419665813446, 'timestamp': '2025-09-30 22:48:16.999361', 'step': 22118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:17.062021', 'step': 22118, 'epoch': 3} {'type': 'loss', 'content': 0.06018095090985298, 'timestamp': '2025-09-30 22:48:17.065573', 'step': 22119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:17.123317', 'step': 22119, 'epoch': 3} {'type': 'loss', 'content': 0.07558538019657135, 'timestamp': '2025-09-30 22:48:17.130232', 'step': 22120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:17.188325', 'step': 22120, 'epoch': 3} {'type': 'loss', 'content': 0.052496109157800674, 'timestamp': '2025-09-30 22:48:17.195433', 'step': 22121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:17.260183', 'step': 22121, 'epoch': 3} {'type': 'loss', 'content': 0.0734039694070816, 'timestamp': '2025-09-30 22:48:17.262812', 'step': 22122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:17.322541', 'step': 22122, 'epoch': 3} {'type': 'loss', 'content': 0.2105480283498764, 'timestamp': '2025-09-30 22:48:17.329099', 'step': 22123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:17.387645', 'step': 22123, 'epoch': 3} {'type': 'loss', 'content': 0.08502456545829773, 'timestamp': '2025-09-30 22:48:17.397859', 'step': 22124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:17.459842', 'step': 22124, 'epoch': 3} {'type': 'loss', 'content': 0.08357648551464081, 'timestamp': '2025-09-30 22:48:17.464665', 'step': 22125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:17.538375', 'step': 22125, 'epoch': 3} {'type': 'loss', 'content': 0.04863705486059189, 'timestamp': '2025-09-30 22:48:17.542901', 'step': 22126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:17.602508', 'step': 22126, 'epoch': 3} {'type': 'loss', 'content': 0.172057643532753, 'timestamp': '2025-09-30 22:48:17.607225', 'step': 22127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:17.667050', 'step': 22127, 'epoch': 3} {'type': 'loss', 'content': 0.08072095364332199, 'timestamp': '2025-09-30 22:48:17.675761', 'step': 22128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:17.757366', 'step': 22128, 'epoch': 3} {'type': 'loss', 'content': 0.05432235449552536, 'timestamp': '2025-09-30 22:48:17.764672', 'step': 22129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:48:17.826203', 'step': 22129, 'epoch': 3} {'type': 'loss', 'content': 0.08100928366184235, 'timestamp': '2025-09-30 22:48:17.829323', 'step': 22130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:17.888082', 'step': 22130, 'epoch': 3} {'type': 'loss', 'content': 0.026749256998300552, 'timestamp': '2025-09-30 22:48:17.892867', 'step': 22131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:17.951288', 'step': 22131, 'epoch': 3} {'type': 'loss', 'content': 0.03720995411276817, 'timestamp': '2025-09-30 22:48:17.958339', 'step': 22132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:18.016821', 'step': 22132, 'epoch': 3} {'type': 'loss', 'content': 0.0756089836359024, 'timestamp': '2025-09-30 22:48:18.019569', 'step': 22133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:18.078962', 'step': 22133, 'epoch': 3} {'type': 'loss', 'content': 0.10824558138847351, 'timestamp': '2025-09-30 22:48:18.082433', 'step': 22134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:18.141042', 'step': 22134, 'epoch': 3} {'type': 'loss', 'content': 0.04724276810884476, 'timestamp': '2025-09-30 22:48:18.144318', 'step': 22135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:18.201782', 'step': 22135, 'epoch': 3} {'type': 'loss', 'content': 0.05835803225636482, 'timestamp': '2025-09-30 22:48:18.209565', 'step': 22136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:18.290903', 'step': 22136, 'epoch': 3} {'type': 'loss', 'content': 0.11376139521598816, 'timestamp': '2025-09-30 22:48:18.295070', 'step': 22137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:18.355957', 'step': 22137, 'epoch': 3} {'type': 'loss', 'content': 0.02650262974202633, 'timestamp': '2025-09-30 22:48:18.359005', 'step': 22138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:18.418625', 'step': 22138, 'epoch': 3} {'type': 'loss', 'content': 0.05534736439585686, 'timestamp': '2025-09-30 22:48:18.422026', 'step': 22139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:18.481648', 'step': 22139, 'epoch': 3} {'type': 'loss', 'content': 0.10152626037597656, 'timestamp': '2025-09-30 22:48:18.490061', 'step': 22140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:18.548707', 'step': 22140, 'epoch': 3} {'type': 'loss', 'content': 0.07752159982919693, 'timestamp': '2025-09-30 22:48:18.552288', 'step': 22141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:18.611553', 'step': 22141, 'epoch': 3} {'type': 'loss', 'content': 0.053817495703697205, 'timestamp': '2025-09-30 22:48:18.615574', 'step': 22142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:18.674074', 'step': 22142, 'epoch': 3} {'type': 'loss', 'content': 0.09662147611379623, 'timestamp': '2025-09-30 22:48:18.677524', 'step': 22143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:18.736723', 'step': 22143, 'epoch': 3} {'type': 'loss', 'content': 0.05684032663702965, 'timestamp': '2025-09-30 22:48:18.743675', 'step': 22144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:18.803410', 'step': 22144, 'epoch': 3} {'type': 'loss', 'content': 0.11579505354166031, 'timestamp': '2025-09-30 22:48:18.814265', 'step': 22145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:18.881834', 'step': 22145, 'epoch': 3} {'type': 'loss', 'content': 0.10330825299024582, 'timestamp': '2025-09-30 22:48:18.886413', 'step': 22146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:18.944743', 'step': 22146, 'epoch': 3} {'type': 'loss', 'content': 0.06260401755571365, 'timestamp': '2025-09-30 22:48:18.947990', 'step': 22147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.021159', 'step': 22147, 'epoch': 3} {'type': 'loss', 'content': 0.04882746562361717, 'timestamp': '2025-09-30 22:48:19.027553', 'step': 22148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:48:19.084693', 'step': 22148, 'epoch': 3} {'type': 'loss', 'content': 0.0528840646147728, 'timestamp': '2025-09-30 22:48:19.088083', 'step': 22149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:19.146974', 'step': 22149, 'epoch': 3} {'type': 'loss', 'content': 0.0792335569858551, 'timestamp': '2025-09-30 22:48:19.149956', 'step': 22150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.220327', 'step': 22150, 'epoch': 3} {'type': 'loss', 'content': 0.1260552704334259, 'timestamp': '2025-09-30 22:48:19.227634', 'step': 22151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.290198', 'step': 22151, 'epoch': 3} {'type': 'loss', 'content': 0.0658627450466156, 'timestamp': '2025-09-30 22:48:19.296604', 'step': 22152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.408881', 'step': 22152, 'epoch': 3} {'type': 'loss', 'content': 0.026018813252449036, 'timestamp': '2025-09-30 22:48:19.412358', 'step': 22153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.536152', 'step': 22153, 'epoch': 3} {'type': 'loss', 'content': 0.026427146047353745, 'timestamp': '2025-09-30 22:48:19.549738', 'step': 22154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.659175', 'step': 22154, 'epoch': 3} {'type': 'loss', 'content': 0.012005513533949852, 'timestamp': '2025-09-30 22:48:19.661694', 'step': 22155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:19.720586', 'step': 22155, 'epoch': 3} {'type': 'loss', 'content': 0.023713063448667526, 'timestamp': '2025-09-30 22:48:19.734115', 'step': 22156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.803746', 'step': 22156, 'epoch': 3} {'type': 'loss', 'content': 0.05769004300236702, 'timestamp': '2025-09-30 22:48:19.811347', 'step': 22157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:19.907082', 'step': 22157, 'epoch': 3} {'type': 'loss', 'content': 0.01787874475121498, 'timestamp': '2025-09-30 22:48:19.912637', 'step': 22158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:19.983648', 'step': 22158, 'epoch': 3} {'type': 'loss', 'content': 0.11818176507949829, 'timestamp': '2025-09-30 22:48:19.993098', 'step': 22159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:20.062244', 'step': 22159, 'epoch': 3} {'type': 'loss', 'content': 0.11682451516389847, 'timestamp': '2025-09-30 22:48:20.086851', 'step': 22160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:20.151627', 'step': 22160, 'epoch': 3} {'type': 'loss', 'content': 0.05719863995909691, 'timestamp': '2025-09-30 22:48:20.162202', 'step': 22161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:20.219197', 'step': 22161, 'epoch': 3} {'type': 'loss', 'content': 0.03964465856552124, 'timestamp': '2025-09-30 22:48:20.233485', 'step': 22162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:20.300963', 'step': 22162, 'epoch': 3} {'type': 'loss', 'content': 0.15321050584316254, 'timestamp': '2025-09-30 22:48:20.317922', 'step': 22163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:20.402477', 'step': 22163, 'epoch': 3} {'type': 'loss', 'content': 0.04800320789217949, 'timestamp': '2025-09-30 22:48:20.416642', 'step': 22164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:20.480079', 'step': 22164, 'epoch': 3} {'type': 'loss', 'content': 0.07869832217693329, 'timestamp': '2025-09-30 22:48:20.496045', 'step': 22165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:20.571773', 'step': 22165, 'epoch': 3} {'type': 'loss', 'content': 0.08107556402683258, 'timestamp': '2025-09-30 22:48:20.599423', 'step': 22166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:20.693674', 'step': 22166, 'epoch': 3} {'type': 'loss', 'content': 0.035094987601041794, 'timestamp': '2025-09-30 22:48:20.705617', 'step': 22167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:20.776479', 'step': 22167, 'epoch': 3} {'type': 'loss', 'content': 0.10915671288967133, 'timestamp': '2025-09-30 22:48:20.788214', 'step': 22168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:20.857740', 'step': 22168, 'epoch': 3} {'type': 'loss', 'content': 0.057807207107543945, 'timestamp': '2025-09-30 22:48:20.875254', 'step': 22169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:20.945927', 'step': 22169, 'epoch': 3} {'type': 'loss', 'content': 0.04167761281132698, 'timestamp': '2025-09-30 22:48:20.963653', 'step': 22170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:21.026802', 'step': 22170, 'epoch': 3} {'type': 'loss', 'content': 0.047553300857543945, 'timestamp': '2025-09-30 22:48:21.030773', 'step': 22171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:21.098193', 'step': 22171, 'epoch': 3} {'type': 'loss', 'content': 0.07291025668382645, 'timestamp': '2025-09-30 22:48:21.106106', 'step': 22172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:21.168706', 'step': 22172, 'epoch': 3} {'type': 'loss', 'content': 0.08616510033607483, 'timestamp': '2025-09-30 22:48:21.172234', 'step': 22173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:21.231172', 'step': 22173, 'epoch': 3} {'type': 'loss', 'content': 0.10266998410224915, 'timestamp': '2025-09-30 22:48:21.235475', 'step': 22174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:21.296018', 'step': 22174, 'epoch': 3} {'type': 'loss', 'content': 0.050249841064214706, 'timestamp': '2025-09-30 22:48:21.299764', 'step': 22175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:21.374478', 'step': 22175, 'epoch': 3} {'type': 'loss', 'content': 0.11404953896999359, 'timestamp': '2025-09-30 22:48:21.380673', 'step': 22176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:21.451513', 'step': 22176, 'epoch': 3} {'type': 'loss', 'content': 0.0795406624674797, 'timestamp': '2025-09-30 22:48:21.463750', 'step': 22177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:21.527472', 'step': 22177, 'epoch': 3} {'type': 'loss', 'content': 0.05993267893791199, 'timestamp': '2025-09-30 22:48:21.536268', 'step': 22178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:21.602047', 'step': 22178, 'epoch': 3} {'type': 'loss', 'content': 0.144647017121315, 'timestamp': '2025-09-30 22:48:21.606294', 'step': 22179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:21.690237', 'step': 22179, 'epoch': 3} {'type': 'loss', 'content': 0.139537513256073, 'timestamp': '2025-09-30 22:48:21.696289', 'step': 22180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:21.773432', 'step': 22180, 'epoch': 3} {'type': 'loss', 'content': 0.13780105113983154, 'timestamp': '2025-09-30 22:48:21.776262', 'step': 22181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:21.846031', 'step': 22181, 'epoch': 3} {'type': 'loss', 'content': 0.039294641464948654, 'timestamp': '2025-09-30 22:48:21.852840', 'step': 22182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:21.930905', 'step': 22182, 'epoch': 3} {'type': 'loss', 'content': 0.12836024165153503, 'timestamp': '2025-09-30 22:48:21.933469', 'step': 22183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:21.993114', 'step': 22183, 'epoch': 3} {'type': 'loss', 'content': 0.04086687043309212, 'timestamp': '2025-09-30 22:48:22.001556', 'step': 22184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:22.080058', 'step': 22184, 'epoch': 3} {'type': 'loss', 'content': 0.16316118836402893, 'timestamp': '2025-09-30 22:48:22.083026', 'step': 22185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:22.157552', 'step': 22185, 'epoch': 3} {'type': 'loss', 'content': 0.07230180501937866, 'timestamp': '2025-09-30 22:48:22.161349', 'step': 22186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:22.231672', 'step': 22186, 'epoch': 3} {'type': 'loss', 'content': 0.019366947934031487, 'timestamp': '2025-09-30 22:48:22.235183', 'step': 22187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:22.300660', 'step': 22187, 'epoch': 3} {'type': 'loss', 'content': 0.08514657616615295, 'timestamp': '2025-09-30 22:48:22.308446', 'step': 22188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:22.369762', 'step': 22188, 'epoch': 3} {'type': 'loss', 'content': 0.0876244455575943, 'timestamp': '2025-09-30 22:48:22.374543', 'step': 22189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:22.452524', 'step': 22189, 'epoch': 3} {'type': 'loss', 'content': 0.054632414132356644, 'timestamp': '2025-09-30 22:48:22.457569', 'step': 22190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:22.544962', 'step': 22190, 'epoch': 3} {'type': 'loss', 'content': 0.13001760840415955, 'timestamp': '2025-09-30 22:48:22.548051', 'step': 22191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:22.608572', 'step': 22191, 'epoch': 3} {'type': 'loss', 'content': 0.0960560217499733, 'timestamp': '2025-09-30 22:48:22.623681', 'step': 22192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:22.681608', 'step': 22192, 'epoch': 3} {'type': 'loss', 'content': 0.09398029744625092, 'timestamp': '2025-09-30 22:48:22.689268', 'step': 22193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:22.748669', 'step': 22193, 'epoch': 3} {'type': 'loss', 'content': 0.058197613805532455, 'timestamp': '2025-09-30 22:48:22.752051', 'step': 22194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:22.810254', 'step': 22194, 'epoch': 3} {'type': 'loss', 'content': 0.019337177276611328, 'timestamp': '2025-09-30 22:48:22.813703', 'step': 22195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:22.870591', 'step': 22195, 'epoch': 3} {'type': 'loss', 'content': 0.12021825462579727, 'timestamp': '2025-09-30 22:48:22.878292', 'step': 22196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:22.935566', 'step': 22196, 'epoch': 3} {'type': 'loss', 'content': 0.021094923838973045, 'timestamp': '2025-09-30 22:48:22.947455', 'step': 22197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:23.011662', 'step': 22197, 'epoch': 3} {'type': 'loss', 'content': 0.1282203197479248, 'timestamp': '2025-09-30 22:48:23.014113', 'step': 22198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:23.075191', 'step': 22198, 'epoch': 3} {'type': 'loss', 'content': 0.036336563527584076, 'timestamp': '2025-09-30 22:48:23.078752', 'step': 22199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:23.136292', 'step': 22199, 'epoch': 3} {'type': 'loss', 'content': 0.034283071756362915, 'timestamp': '2025-09-30 22:48:23.142817', 'step': 22200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:23.216730', 'step': 22200, 'epoch': 3} {'type': 'loss', 'content': 0.17182135581970215, 'timestamp': '2025-09-30 22:48:23.225457', 'step': 22201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:23.282272', 'step': 22201, 'epoch': 3} {'type': 'loss', 'content': 0.04069427773356438, 'timestamp': '2025-09-30 22:48:23.285979', 'step': 22202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:23.345401', 'step': 22202, 'epoch': 3} {'type': 'loss', 'content': 0.07998175173997879, 'timestamp': '2025-09-30 22:48:23.348960', 'step': 22203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:23.425557', 'step': 22203, 'epoch': 3} {'type': 'loss', 'content': 0.002558513544499874, 'timestamp': '2025-09-30 22:48:23.432736', 'step': 22204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:23.489917', 'step': 22204, 'epoch': 3} {'type': 'loss', 'content': 0.023294953629374504, 'timestamp': '2025-09-30 22:48:23.492946', 'step': 22205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:23.556955', 'step': 22205, 'epoch': 3} {'type': 'loss', 'content': 0.09582622349262238, 'timestamp': '2025-09-30 22:48:23.568352', 'step': 22206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:23.633625', 'step': 22206, 'epoch': 3} {'type': 'loss', 'content': 0.08837087452411652, 'timestamp': '2025-09-30 22:48:23.636671', 'step': 22207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:23.695961', 'step': 22207, 'epoch': 3} {'type': 'loss', 'content': 0.06827935576438904, 'timestamp': '2025-09-30 22:48:23.702768', 'step': 22208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:23.775622', 'step': 22208, 'epoch': 3} {'type': 'loss', 'content': 0.04271703213453293, 'timestamp': '2025-09-30 22:48:23.778511', 'step': 22209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:23.835656', 'step': 22209, 'epoch': 3} {'type': 'loss', 'content': 0.09579383581876755, 'timestamp': '2025-09-30 22:48:23.839023', 'step': 22210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:23.897715', 'step': 22210, 'epoch': 3} {'type': 'loss', 'content': 0.053974758833646774, 'timestamp': '2025-09-30 22:48:23.908367', 'step': 22211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:23.966884', 'step': 22211, 'epoch': 3} {'type': 'loss', 'content': 0.06341134756803513, 'timestamp': '2025-09-30 22:48:23.973472', 'step': 22212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.030180', 'step': 22212, 'epoch': 3} {'type': 'loss', 'content': 0.0855555459856987, 'timestamp': '2025-09-30 22:48:24.040099', 'step': 22213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.098502', 'step': 22213, 'epoch': 3} {'type': 'loss', 'content': 0.08401709049940109, 'timestamp': '2025-09-30 22:48:24.102936', 'step': 22214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.161067', 'step': 22214, 'epoch': 3} {'type': 'loss', 'content': 0.04869537428021431, 'timestamp': '2025-09-30 22:48:24.169479', 'step': 22215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.229236', 'step': 22215, 'epoch': 3} {'type': 'loss', 'content': 0.06226862594485283, 'timestamp': '2025-09-30 22:48:24.235943', 'step': 22216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:24.309461', 'step': 22216, 'epoch': 3} {'type': 'loss', 'content': 0.0990348681807518, 'timestamp': '2025-09-30 22:48:24.312668', 'step': 22217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.370824', 'step': 22217, 'epoch': 3} {'type': 'loss', 'content': 0.13440881669521332, 'timestamp': '2025-09-30 22:48:24.379416', 'step': 22218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.440449', 'step': 22218, 'epoch': 3} {'type': 'loss', 'content': 0.04572019726037979, 'timestamp': '2025-09-30 22:48:24.451757', 'step': 22219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:24.516585', 'step': 22219, 'epoch': 3} {'type': 'loss', 'content': 0.03171612694859505, 'timestamp': '2025-09-30 22:48:24.523352', 'step': 22220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:24.581146', 'step': 22220, 'epoch': 3} {'type': 'loss', 'content': 0.0707559883594513, 'timestamp': '2025-09-30 22:48:24.583953', 'step': 22221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.642957', 'step': 22221, 'epoch': 3} {'type': 'loss', 'content': 0.09673958271741867, 'timestamp': '2025-09-30 22:48:24.646085', 'step': 22222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:24.720425', 'step': 22222, 'epoch': 3} {'type': 'loss', 'content': 0.10197548568248749, 'timestamp': '2025-09-30 22:48:24.723019', 'step': 22223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:24.783298', 'step': 22223, 'epoch': 3} {'type': 'loss', 'content': 0.047514256089925766, 'timestamp': '2025-09-30 22:48:24.792640', 'step': 22224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:24.851696', 'step': 22224, 'epoch': 3} {'type': 'loss', 'content': 0.04978910833597183, 'timestamp': '2025-09-30 22:48:24.855709', 'step': 22225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:24.913626', 'step': 22225, 'epoch': 3} {'type': 'loss', 'content': 0.10706554353237152, 'timestamp': '2025-09-30 22:48:24.916634', 'step': 22226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:24.975280', 'step': 22226, 'epoch': 3} {'type': 'loss', 'content': 0.05913812294602394, 'timestamp': '2025-09-30 22:48:24.979519', 'step': 22227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:25.038586', 'step': 22227, 'epoch': 3} {'type': 'loss', 'content': 0.07622287422418594, 'timestamp': '2025-09-30 22:48:25.054968', 'step': 22228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:25.117451', 'step': 22228, 'epoch': 3} {'type': 'loss', 'content': 0.0662238672375679, 'timestamp': '2025-09-30 22:48:25.123383', 'step': 22229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:25.185973', 'step': 22229, 'epoch': 3} {'type': 'loss', 'content': 0.08821627497673035, 'timestamp': '2025-09-30 22:48:25.189223', 'step': 22230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:25.249285', 'step': 22230, 'epoch': 3} {'type': 'loss', 'content': 0.0653221607208252, 'timestamp': '2025-09-30 22:48:25.253978', 'step': 22231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:25.317778', 'step': 22231, 'epoch': 3} {'type': 'loss', 'content': 0.09295429289340973, 'timestamp': '2025-09-30 22:48:25.324647', 'step': 22232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:25.394188', 'step': 22232, 'epoch': 3} {'type': 'loss', 'content': 0.11807854473590851, 'timestamp': '2025-09-30 22:48:25.407637', 'step': 22233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:25.467898', 'step': 22233, 'epoch': 3} {'type': 'loss', 'content': 0.09182031452655792, 'timestamp': '2025-09-30 22:48:25.475161', 'step': 22234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:25.535275', 'step': 22234, 'epoch': 3} {'type': 'loss', 'content': 0.07662170380353928, 'timestamp': '2025-09-30 22:48:25.538755', 'step': 22235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:25.614747', 'step': 22235, 'epoch': 3} {'type': 'loss', 'content': 0.06240519881248474, 'timestamp': '2025-09-30 22:48:25.621527', 'step': 22236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:25.685543', 'step': 22236, 'epoch': 3} {'type': 'loss', 'content': 0.12346089631319046, 'timestamp': '2025-09-30 22:48:25.695931', 'step': 22237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:25.761919', 'step': 22237, 'epoch': 3} {'type': 'loss', 'content': 0.08407219499349594, 'timestamp': '2025-09-30 22:48:25.767047', 'step': 22238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:25.825213', 'step': 22238, 'epoch': 3} {'type': 'loss', 'content': 0.11550037562847137, 'timestamp': '2025-09-30 22:48:25.828375', 'step': 22239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:25.899061', 'step': 22239, 'epoch': 3} {'type': 'loss', 'content': 0.11395996063947678, 'timestamp': '2025-09-30 22:48:25.905484', 'step': 22240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:25.964517', 'step': 22240, 'epoch': 3} {'type': 'loss', 'content': 0.203369602560997, 'timestamp': '2025-09-30 22:48:25.968434', 'step': 22241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:26.031952', 'step': 22241, 'epoch': 3} {'type': 'loss', 'content': 0.09478136897087097, 'timestamp': '2025-09-30 22:48:26.037349', 'step': 22242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:26.130985', 'step': 22242, 'epoch': 3} {'type': 'loss', 'content': 0.07015136629343033, 'timestamp': '2025-09-30 22:48:26.135051', 'step': 22243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:26.205069', 'step': 22243, 'epoch': 3} {'type': 'loss', 'content': 0.05502558499574661, 'timestamp': '2025-09-30 22:48:26.211881', 'step': 22244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:26.270193', 'step': 22244, 'epoch': 3} {'type': 'loss', 'content': 0.04769926518201828, 'timestamp': '2025-09-30 22:48:26.275895', 'step': 22245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:26.336899', 'step': 22245, 'epoch': 3} {'type': 'loss', 'content': 0.07189912348985672, 'timestamp': '2025-09-30 22:48:26.340231', 'step': 22246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:26.401617', 'step': 22246, 'epoch': 3} {'type': 'loss', 'content': 0.05173037573695183, 'timestamp': '2025-09-30 22:48:26.404928', 'step': 22247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:26.462113', 'step': 22247, 'epoch': 3} {'type': 'loss', 'content': 0.06427626311779022, 'timestamp': '2025-09-30 22:48:26.469119', 'step': 22248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:26.526275', 'step': 22248, 'epoch': 3} {'type': 'loss', 'content': 0.06859415024518967, 'timestamp': '2025-09-30 22:48:26.528745', 'step': 22249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:26.588690', 'step': 22249, 'epoch': 3} {'type': 'loss', 'content': 0.15481504797935486, 'timestamp': '2025-09-30 22:48:26.591839', 'step': 22250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:26.662113', 'step': 22250, 'epoch': 3} {'type': 'loss', 'content': 0.13003847002983093, 'timestamp': '2025-09-30 22:48:26.666271', 'step': 22251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:26.723972', 'step': 22251, 'epoch': 3} {'type': 'loss', 'content': 0.12952663004398346, 'timestamp': '2025-09-30 22:48:26.740317', 'step': 22252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:26.797886', 'step': 22252, 'epoch': 3} {'type': 'loss', 'content': 0.09378448873758316, 'timestamp': '2025-09-30 22:48:26.800429', 'step': 22253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:26.858392', 'step': 22253, 'epoch': 3} {'type': 'loss', 'content': 0.04951603338122368, 'timestamp': '2025-09-30 22:48:26.861075', 'step': 22254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:26.934621', 'step': 22254, 'epoch': 3} {'type': 'loss', 'content': 0.0828208178281784, 'timestamp': '2025-09-30 22:48:26.937421', 'step': 22255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:27.005016', 'step': 22255, 'epoch': 3} {'type': 'loss', 'content': 0.10004103928804398, 'timestamp': '2025-09-30 22:48:27.011075', 'step': 22256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:27.069961', 'step': 22256, 'epoch': 3} {'type': 'loss', 'content': 0.11691693216562271, 'timestamp': '2025-09-30 22:48:27.072986', 'step': 22257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:27.129949', 'step': 22257, 'epoch': 3} {'type': 'loss', 'content': 0.034428197890520096, 'timestamp': '2025-09-30 22:48:27.132656', 'step': 22258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:27.202289', 'step': 22258, 'epoch': 3} {'type': 'loss', 'content': 0.11011090874671936, 'timestamp': '2025-09-30 22:48:27.205635', 'step': 22259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:27.264667', 'step': 22259, 'epoch': 3} {'type': 'loss', 'content': 0.07724151760339737, 'timestamp': '2025-09-30 22:48:27.271717', 'step': 22260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:27.332030', 'step': 22260, 'epoch': 3} {'type': 'loss', 'content': 0.1067962720990181, 'timestamp': '2025-09-30 22:48:27.336019', 'step': 22261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:27.394772', 'step': 22261, 'epoch': 3} {'type': 'loss', 'content': 0.08509612083435059, 'timestamp': '2025-09-30 22:48:27.398571', 'step': 22262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:27.457792', 'step': 22262, 'epoch': 3} {'type': 'loss', 'content': 0.04521968960762024, 'timestamp': '2025-09-30 22:48:27.461849', 'step': 22263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:27.528529', 'step': 22263, 'epoch': 3} {'type': 'loss', 'content': 0.1165689006447792, 'timestamp': '2025-09-30 22:48:27.536398', 'step': 22264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:27.593313', 'step': 22264, 'epoch': 3} {'type': 'loss', 'content': 0.08137034624814987, 'timestamp': '2025-09-30 22:48:27.597151', 'step': 22265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:27.655233', 'step': 22265, 'epoch': 3} {'type': 'loss', 'content': 0.0750608891248703, 'timestamp': '2025-09-30 22:48:27.659636', 'step': 22266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:27.727628', 'step': 22266, 'epoch': 3} {'type': 'loss', 'content': 0.06298971176147461, 'timestamp': '2025-09-30 22:48:27.731071', 'step': 22267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:27.806795', 'step': 22267, 'epoch': 3} {'type': 'loss', 'content': 0.08061116188764572, 'timestamp': '2025-09-30 22:48:27.813782', 'step': 22268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:27.870971', 'step': 22268, 'epoch': 3} {'type': 'loss', 'content': 0.044575825333595276, 'timestamp': '2025-09-30 22:48:27.874256', 'step': 22269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:27.947953', 'step': 22269, 'epoch': 3} {'type': 'loss', 'content': 0.07862384617328644, 'timestamp': '2025-09-30 22:48:27.951320', 'step': 22270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:28.010553', 'step': 22270, 'epoch': 3} {'type': 'loss', 'content': 0.06344304233789444, 'timestamp': '2025-09-30 22:48:28.023440', 'step': 22271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:28.087871', 'step': 22271, 'epoch': 3} {'type': 'loss', 'content': 0.10206776112318039, 'timestamp': '2025-09-30 22:48:28.094784', 'step': 22272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:28.154952', 'step': 22272, 'epoch': 3} {'type': 'loss', 'content': 0.07786715775728226, 'timestamp': '2025-09-30 22:48:28.158331', 'step': 22273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:28.219654', 'step': 22273, 'epoch': 3} {'type': 'loss', 'content': 0.07086513191461563, 'timestamp': '2025-09-30 22:48:28.227651', 'step': 22274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:28.286885', 'step': 22274, 'epoch': 3} {'type': 'loss', 'content': 0.06884265691041946, 'timestamp': '2025-09-30 22:48:28.291265', 'step': 22275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:28.349877', 'step': 22275, 'epoch': 3} {'type': 'loss', 'content': 0.08355796337127686, 'timestamp': '2025-09-30 22:48:28.356896', 'step': 22276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:28.425357', 'step': 22276, 'epoch': 3} {'type': 'loss', 'content': 0.1550706923007965, 'timestamp': '2025-09-30 22:48:28.428300', 'step': 22277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:28.487691', 'step': 22277, 'epoch': 3} {'type': 'loss', 'content': 0.034120772033929825, 'timestamp': '2025-09-30 22:48:28.491083', 'step': 22278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:28.559474', 'step': 22278, 'epoch': 3} {'type': 'loss', 'content': 0.11733409762382507, 'timestamp': '2025-09-30 22:48:28.563350', 'step': 22279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:28.621712', 'step': 22279, 'epoch': 3} {'type': 'loss', 'content': 0.07034069299697876, 'timestamp': '2025-09-30 22:48:28.628185', 'step': 22280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:28.692257', 'step': 22280, 'epoch': 3} {'type': 'loss', 'content': 0.082614466547966, 'timestamp': '2025-09-30 22:48:28.700350', 'step': 22281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:28.759355', 'step': 22281, 'epoch': 3} {'type': 'loss', 'content': 0.1044187843799591, 'timestamp': '2025-09-30 22:48:28.772648', 'step': 22282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:28.832503', 'step': 22282, 'epoch': 3} {'type': 'loss', 'content': 0.021715983748435974, 'timestamp': '2025-09-30 22:48:28.836002', 'step': 22283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:28.893969', 'step': 22283, 'epoch': 3} {'type': 'loss', 'content': 0.10537125915288925, 'timestamp': '2025-09-30 22:48:28.900809', 'step': 22284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:28.964671', 'step': 22284, 'epoch': 3} {'type': 'loss', 'content': 0.12443742156028748, 'timestamp': '2025-09-30 22:48:28.967042', 'step': 22285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:29.032094', 'step': 22285, 'epoch': 3} {'type': 'loss', 'content': 0.026614109054207802, 'timestamp': '2025-09-30 22:48:29.035412', 'step': 22286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:29.102326', 'step': 22286, 'epoch': 3} {'type': 'loss', 'content': 0.0501808263361454, 'timestamp': '2025-09-30 22:48:29.106286', 'step': 22287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:29.180223', 'step': 22287, 'epoch': 3} {'type': 'loss', 'content': 0.13575485348701477, 'timestamp': '2025-09-30 22:48:29.187666', 'step': 22288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:29.246271', 'step': 22288, 'epoch': 3} {'type': 'loss', 'content': 0.08533935248851776, 'timestamp': '2025-09-30 22:48:29.254348', 'step': 22289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:29.317980', 'step': 22289, 'epoch': 3} {'type': 'loss', 'content': 0.13951729238033295, 'timestamp': '2025-09-30 22:48:29.320610', 'step': 22290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 3520021436096.0}, 'timestamp': '2025-09-30 22:48:29.379884', 'step': 22290, 'epoch': 3} {'type': 'loss', 'content': 0.06889591366052628, 'timestamp': '2025-09-30 22:48:29.392824', 'step': 22291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:29.451658', 'step': 22291, 'epoch': 3} {'type': 'loss', 'content': 0.15877673029899597, 'timestamp': '2025-09-30 22:48:29.463606', 'step': 22292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:29.527669', 'step': 22292, 'epoch': 3} {'type': 'loss', 'content': 0.12207749485969543, 'timestamp': '2025-09-30 22:48:29.530738', 'step': 22293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:29.587976', 'step': 22293, 'epoch': 3} {'type': 'loss', 'content': 0.09124255925416946, 'timestamp': '2025-09-30 22:48:29.591476', 'step': 22294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:29.679261', 'step': 22294, 'epoch': 3} {'type': 'loss', 'content': 0.08228972554206848, 'timestamp': '2025-09-30 22:48:29.682655', 'step': 22295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:29.742175', 'step': 22295, 'epoch': 3} {'type': 'loss', 'content': 0.1064271405339241, 'timestamp': '2025-09-30 22:48:29.756926', 'step': 22296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:29.815087', 'step': 22296, 'epoch': 3} {'type': 'loss', 'content': 0.07941605895757675, 'timestamp': '2025-09-30 22:48:29.818339', 'step': 22297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:29.876179', 'step': 22297, 'epoch': 3} {'type': 'loss', 'content': 0.04116334393620491, 'timestamp': '2025-09-30 22:48:29.878883', 'step': 22298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:29.936898', 'step': 22298, 'epoch': 3} {'type': 'loss', 'content': 0.015069271437823772, 'timestamp': '2025-09-30 22:48:29.940093', 'step': 22299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:29.996056', 'step': 22299, 'epoch': 3} {'type': 'loss', 'content': 0.07896644622087479, 'timestamp': '2025-09-30 22:48:30.003892', 'step': 22300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:30.079355', 'step': 22300, 'epoch': 3} {'type': 'loss', 'content': 0.120658278465271, 'timestamp': '2025-09-30 22:48:30.082592', 'step': 22301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:30.143211', 'step': 22301, 'epoch': 3} {'type': 'loss', 'content': 0.06404461711645126, 'timestamp': '2025-09-30 22:48:30.146416', 'step': 22302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:30.221549', 'step': 22302, 'epoch': 3} {'type': 'loss', 'content': 0.08906101435422897, 'timestamp': '2025-09-30 22:48:30.225267', 'step': 22303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:30.283951', 'step': 22303, 'epoch': 3} {'type': 'loss', 'content': 0.08188320696353912, 'timestamp': '2025-09-30 22:48:30.291728', 'step': 22304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:30.364940', 'step': 22304, 'epoch': 3} {'type': 'loss', 'content': 0.12101735174655914, 'timestamp': '2025-09-30 22:48:30.367554', 'step': 22305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:30.426057', 'step': 22305, 'epoch': 3} {'type': 'loss', 'content': 0.062011003494262695, 'timestamp': '2025-09-30 22:48:30.430160', 'step': 22306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:30.491386', 'step': 22306, 'epoch': 3} {'type': 'loss', 'content': 0.028968052938580513, 'timestamp': '2025-09-30 22:48:30.494317', 'step': 22307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:30.556625', 'step': 22307, 'epoch': 3} {'type': 'loss', 'content': 0.061236120760440826, 'timestamp': '2025-09-30 22:48:30.563805', 'step': 22308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:30.621566', 'step': 22308, 'epoch': 3} {'type': 'loss', 'content': 0.10299945622682571, 'timestamp': '2025-09-30 22:48:30.624734', 'step': 22309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:30.688130', 'step': 22309, 'epoch': 3} {'type': 'loss', 'content': 0.037939414381980896, 'timestamp': '2025-09-30 22:48:30.691810', 'step': 22310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:30.750415', 'step': 22310, 'epoch': 3} {'type': 'loss', 'content': 0.09709898382425308, 'timestamp': '2025-09-30 22:48:30.754232', 'step': 22311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:30.814327', 'step': 22311, 'epoch': 3} {'type': 'loss', 'content': 0.09154372662305832, 'timestamp': '2025-09-30 22:48:30.820918', 'step': 22312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:30.888946', 'step': 22312, 'epoch': 3} {'type': 'loss', 'content': 0.10540074110031128, 'timestamp': '2025-09-30 22:48:30.892875', 'step': 22313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:30.953135', 'step': 22313, 'epoch': 3} {'type': 'loss', 'content': 0.10646648705005646, 'timestamp': '2025-09-30 22:48:30.956235', 'step': 22314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:31.014404', 'step': 22314, 'epoch': 3} {'type': 'loss', 'content': 0.054598741233348846, 'timestamp': '2025-09-30 22:48:31.017616', 'step': 22315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.076826', 'step': 22315, 'epoch': 3} {'type': 'loss', 'content': 0.09198285639286041, 'timestamp': '2025-09-30 22:48:31.084351', 'step': 22316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:31.150052', 'step': 22316, 'epoch': 3} {'type': 'loss', 'content': 0.08354724943637848, 'timestamp': '2025-09-30 22:48:31.154005', 'step': 22317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:31.212765', 'step': 22317, 'epoch': 3} {'type': 'loss', 'content': 0.05862661451101303, 'timestamp': '2025-09-30 22:48:31.216617', 'step': 22318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.274870', 'step': 22318, 'epoch': 3} {'type': 'loss', 'content': 0.042971886694431305, 'timestamp': '2025-09-30 22:48:31.277629', 'step': 22319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.336043', 'step': 22319, 'epoch': 3} {'type': 'loss', 'content': 0.05118628218770027, 'timestamp': '2025-09-30 22:48:31.343374', 'step': 22320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:31.400849', 'step': 22320, 'epoch': 3} {'type': 'loss', 'content': 0.11497477442026138, 'timestamp': '2025-09-30 22:48:31.404271', 'step': 22321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.469111', 'step': 22321, 'epoch': 3} {'type': 'loss', 'content': 0.10752493143081665, 'timestamp': '2025-09-30 22:48:31.472757', 'step': 22322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:31.532059', 'step': 22322, 'epoch': 3} {'type': 'loss', 'content': 0.10624190419912338, 'timestamp': '2025-09-30 22:48:31.535394', 'step': 22323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.594983', 'step': 22323, 'epoch': 3} {'type': 'loss', 'content': 0.060513127595186234, 'timestamp': '2025-09-30 22:48:31.602738', 'step': 22324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:31.663708', 'step': 22324, 'epoch': 3} {'type': 'loss', 'content': 0.05170396715402603, 'timestamp': '2025-09-30 22:48:31.667149', 'step': 22325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:31.725722', 'step': 22325, 'epoch': 3} {'type': 'loss', 'content': 0.09730762243270874, 'timestamp': '2025-09-30 22:48:31.729444', 'step': 22326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.787129', 'step': 22326, 'epoch': 3} {'type': 'loss', 'content': 0.07551898807287216, 'timestamp': '2025-09-30 22:48:31.791062', 'step': 22327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.848977', 'step': 22327, 'epoch': 3} {'type': 'loss', 'content': 0.20397642254829407, 'timestamp': '2025-09-30 22:48:31.854954', 'step': 22328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:31.931283', 'step': 22328, 'epoch': 3} {'type': 'loss', 'content': 0.14347819983959198, 'timestamp': '2025-09-30 22:48:31.934535', 'step': 22329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:31.991273', 'step': 22329, 'epoch': 3} {'type': 'loss', 'content': 0.04686451703310013, 'timestamp': '2025-09-30 22:48:32.001133', 'step': 22330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:32.069469', 'step': 22330, 'epoch': 3} {'type': 'loss', 'content': 0.01670198142528534, 'timestamp': '2025-09-30 22:48:32.073582', 'step': 22331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:32.150981', 'step': 22331, 'epoch': 3} {'type': 'loss', 'content': 0.04925605282187462, 'timestamp': '2025-09-30 22:48:32.158580', 'step': 22332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:32.224511', 'step': 22332, 'epoch': 3} {'type': 'loss', 'content': 0.09539780765771866, 'timestamp': '2025-09-30 22:48:32.227919', 'step': 22333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:32.298613', 'step': 22333, 'epoch': 3} {'type': 'loss', 'content': 0.025086967274546623, 'timestamp': '2025-09-30 22:48:32.301872', 'step': 22334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:32.359741', 'step': 22334, 'epoch': 3} {'type': 'loss', 'content': 0.02065298520028591, 'timestamp': '2025-09-30 22:48:32.363002', 'step': 22335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:32.421243', 'step': 22335, 'epoch': 3} {'type': 'loss', 'content': 0.03480549901723862, 'timestamp': '2025-09-30 22:48:32.428451', 'step': 22336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:32.500666', 'step': 22336, 'epoch': 3} {'type': 'loss', 'content': 0.08797311782836914, 'timestamp': '2025-09-30 22:48:32.504944', 'step': 22337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:32.563166', 'step': 22337, 'epoch': 3} {'type': 'loss', 'content': 0.04190852865576744, 'timestamp': '2025-09-30 22:48:32.567374', 'step': 22338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:32.639400', 'step': 22338, 'epoch': 3} {'type': 'loss', 'content': 0.0668877437710762, 'timestamp': '2025-09-30 22:48:32.652168', 'step': 22339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:32.726064', 'step': 22339, 'epoch': 3} {'type': 'loss', 'content': 0.1063104048371315, 'timestamp': '2025-09-30 22:48:32.732185', 'step': 22340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:32.789718', 'step': 22340, 'epoch': 3} {'type': 'loss', 'content': 0.026950271800160408, 'timestamp': '2025-09-30 22:48:32.793134', 'step': 22341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:32.854611', 'step': 22341, 'epoch': 3} {'type': 'loss', 'content': 0.07129385322332382, 'timestamp': '2025-09-30 22:48:32.858267', 'step': 22342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:32.917792', 'step': 22342, 'epoch': 3} {'type': 'loss', 'content': 0.06634702533483505, 'timestamp': '2025-09-30 22:48:32.924033', 'step': 22343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:32.987392', 'step': 22343, 'epoch': 3} {'type': 'loss', 'content': 0.04592851549386978, 'timestamp': '2025-09-30 22:48:32.995534', 'step': 22344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:33.065575', 'step': 22344, 'epoch': 3} {'type': 'loss', 'content': 0.14016954600811005, 'timestamp': '2025-09-30 22:48:33.071014', 'step': 22345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:33.132308', 'step': 22345, 'epoch': 3} {'type': 'loss', 'content': 0.029636794701218605, 'timestamp': '2025-09-30 22:48:33.135235', 'step': 22346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:33.193765', 'step': 22346, 'epoch': 3} {'type': 'loss', 'content': 0.050433430820703506, 'timestamp': '2025-09-30 22:48:33.197079', 'step': 22347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:33.256168', 'step': 22347, 'epoch': 3} {'type': 'loss', 'content': 0.06523530930280685, 'timestamp': '2025-09-30 22:48:33.262449', 'step': 22348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:33.319607', 'step': 22348, 'epoch': 3} {'type': 'loss', 'content': 0.11618956178426743, 'timestamp': '2025-09-30 22:48:33.324793', 'step': 22349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:33.383193', 'step': 22349, 'epoch': 3} {'type': 'loss', 'content': 0.07924305647611618, 'timestamp': '2025-09-30 22:48:33.388143', 'step': 22350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:33.457293', 'step': 22350, 'epoch': 3} {'type': 'loss', 'content': 0.10114189982414246, 'timestamp': '2025-09-30 22:48:33.459947', 'step': 22351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:33.538278', 'step': 22351, 'epoch': 3} {'type': 'loss', 'content': 0.16147162020206451, 'timestamp': '2025-09-30 22:48:33.546092', 'step': 22352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:33.604040', 'step': 22352, 'epoch': 3} {'type': 'loss', 'content': 0.06099680811166763, 'timestamp': '2025-09-30 22:48:33.606616', 'step': 22353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 22:48:33.663399', 'step': 22353, 'epoch': 3} {'type': 'loss', 'content': 0.05067500099539757, 'timestamp': '2025-09-30 22:48:33.665961', 'step': 22354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:33.724593', 'step': 22354, 'epoch': 3} {'type': 'loss', 'content': 0.10130997002124786, 'timestamp': '2025-09-30 22:48:33.727353', 'step': 22355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:33.786332', 'step': 22355, 'epoch': 3} {'type': 'loss', 'content': 0.06375573575496674, 'timestamp': '2025-09-30 22:48:33.795108', 'step': 22356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:33.855422', 'step': 22356, 'epoch': 3} {'type': 'loss', 'content': 0.12391418218612671, 'timestamp': '2025-09-30 22:48:33.857905', 'step': 22357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:33.916032', 'step': 22357, 'epoch': 3} {'type': 'loss', 'content': 0.021352048963308334, 'timestamp': '2025-09-30 22:48:33.918521', 'step': 22358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:33.992556', 'step': 22358, 'epoch': 3} {'type': 'loss', 'content': 0.05800290033221245, 'timestamp': '2025-09-30 22:48:33.995969', 'step': 22359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:34.054084', 'step': 22359, 'epoch': 3} {'type': 'loss', 'content': 0.1229018047451973, 'timestamp': '2025-09-30 22:48:34.061861', 'step': 22360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:34.119056', 'step': 22360, 'epoch': 3} {'type': 'loss', 'content': 0.06783920526504517, 'timestamp': '2025-09-30 22:48:34.127400', 'step': 22361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:34.195858', 'step': 22361, 'epoch': 3} {'type': 'loss', 'content': 0.09537109732627869, 'timestamp': '2025-09-30 22:48:34.198663', 'step': 22362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 3840023378688.0}, 'timestamp': '2025-09-30 22:48:34.263258', 'step': 22362, 'epoch': 3} {'type': 'loss', 'content': 0.12458086013793945, 'timestamp': '2025-09-30 22:48:34.267735', 'step': 22363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:34.328955', 'step': 22363, 'epoch': 3} {'type': 'loss', 'content': 0.05492062866687775, 'timestamp': '2025-09-30 22:48:34.335567', 'step': 22364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:34.395706', 'step': 22364, 'epoch': 3} {'type': 'loss', 'content': 0.030012845993041992, 'timestamp': '2025-09-30 22:48:34.398847', 'step': 22365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:34.470817', 'step': 22365, 'epoch': 3} {'type': 'loss', 'content': 0.08629356324672699, 'timestamp': '2025-09-30 22:48:34.474658', 'step': 22366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:34.532363', 'step': 22366, 'epoch': 3} {'type': 'loss', 'content': 0.1844826638698578, 'timestamp': '2025-09-30 22:48:34.534926', 'step': 22367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:34.592327', 'step': 22367, 'epoch': 3} {'type': 'loss', 'content': 0.150527685880661, 'timestamp': '2025-09-30 22:48:34.598727', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:48:49.963614', 'step': 22368, 'epoch': 3} {'type': 'pplx', 'content': 8749.626339377068, 'timestamp': '2025-09-30 22:48:49.968447', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 3200019493504.0}, 'timestamp': '2025-09-30 22:48:50.028566', 'step': 22368, 'epoch': 3} {'type': 'loss', 'content': 0.11833116412162781, 'timestamp': '2025-09-30 22:48:50.034400', 'step': 22369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:50.094934', 'step': 22369, 'epoch': 3} {'type': 'loss', 'content': 0.042562760412693024, 'timestamp': '2025-09-30 22:48:50.097877', 'step': 22370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:50.158970', 'step': 22370, 'epoch': 3} {'type': 'loss', 'content': 0.07314503192901611, 'timestamp': '2025-09-30 22:48:50.162662', 'step': 22371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:50.234975', 'step': 22371, 'epoch': 3} {'type': 'loss', 'content': 0.03141302615404129, 'timestamp': '2025-09-30 22:48:50.242044', 'step': 22372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:50.300267', 'step': 22372, 'epoch': 3} {'type': 'loss', 'content': 0.08018296211957932, 'timestamp': '2025-09-30 22:48:50.304904', 'step': 22373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:50.365176', 'step': 22373, 'epoch': 3} {'type': 'loss', 'content': 0.029588032513856888, 'timestamp': '2025-09-30 22:48:50.369472', 'step': 22374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 22:48:50.445844', 'step': 22374, 'epoch': 3} {'type': 'loss', 'content': 0.2430979162454605, 'timestamp': '2025-09-30 22:48:50.449452', 'step': 22375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 22:48:50.508229', 'step': 22375, 'epoch': 3} {'type': 'loss', 'content': 0.08685886114835739, 'timestamp': '2025-09-30 22:48:50.515803', 'step': 22376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:50.573944', 'step': 22376, 'epoch': 3} {'type': 'loss', 'content': 0.06718935817480087, 'timestamp': '2025-09-30 22:48:50.576631', 'step': 22377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 22:48:50.650755', 'step': 22377, 'epoch': 3} {'type': 'loss', 'content': 0.04474838450551033, 'timestamp': '2025-09-30 22:48:50.653969', 'step': 22378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 22:48:50.712477', 'step': 22378, 'epoch': 3} {'type': 'loss', 'content': 0.05088832229375839, 'timestamp': '2025-09-30 22:48:50.715349', 'step': 22379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1040006410960.0}, 'timestamp': '2025-09-30 22:48:50.778998', 'step': 22379, 'epoch': 3} {'type': 'loss', 'content': 0.003017884911969304, 'timestamp': '2025-09-30 22:48:50.796309', 'step': 22380, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3832594718208}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3193828943104}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2555063168000}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2874446055552}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2235680280448}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 958148730240}], 'timestamp': '2025-09-30 22:49:07.431953', 'step': 22380, 'epoch': 3} {'type': 'pplx', 'content': 8764.965955672631, 'timestamp': '2025-09-30 22:49:07.446524', 'step': 22380, 'epoch': 3} {'type': 'best_pplx', 'content': 7503.194033846198, 'timestamp': '2025-09-30 22:49:07.449448', 'step': 22380, 'epoch': 3} {'type': 'best_step', 'content': 19106, 'timestamp': '2025-09-30 22:49:07.451492', 'step': 22380, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 50797850157497600, 'timestamp': '2025-09-30 22:49:07.455029', 'step': 22380, 'epoch': 3} {'type': 'total_train_flops', 'content': 5.161879486776446e+16, 'timestamp': '2025-09-30 22:49:07.460496', 'step': 22380, 'epoch': 3}